stash
This commit is contained in:
122
stash/config/scrapers/community/PMVHaven/PMVHaven.py
Normal file
122
stash/config/scrapers/community/PMVHaven/PMVHaven.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import json
|
||||
import sys
|
||||
import requests
|
||||
import re
|
||||
|
||||
try:
|
||||
import py_common.log as log
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def fail(message):
|
||||
log.error(message)
|
||||
sys.exit(1)
|
||||
|
||||
def getData(sceneId: str):
|
||||
try:
|
||||
req = requests.post("https://pmvhaven.com/api/v2/videoInput", json={
|
||||
"video": sceneId,
|
||||
"mode": "InitVideo",
|
||||
"view": True
|
||||
})
|
||||
except Exception as e:
|
||||
fail(f"Error fetching data from PMVHaven API: {e}")
|
||||
return req.json()
|
||||
|
||||
def getIMG(video):
|
||||
# reversed because we want the most recent thumb
|
||||
for item in reversed(video['thumbnails']):
|
||||
if item.startswith("https://storage.pmvhaven.com/"):
|
||||
return item
|
||||
return ""
|
||||
|
||||
def getVideoById(sceneId):
|
||||
data = getData(sceneId)
|
||||
|
||||
if not 'video' in data or len(data['video']) < 1:
|
||||
fail(f"Video data not found in API response: {data}")
|
||||
|
||||
video = data['video'][0]
|
||||
tags = video['tags'] + video['categories']
|
||||
urlTitle = video['title'].replace(' ', '-')
|
||||
|
||||
return {
|
||||
'title': video['title'],
|
||||
'url': f"https://pmvhaven.com/video/{urlTitle}_{video['_id']}",
|
||||
'image': getIMG(video),
|
||||
'date': video['isoDate'].split('T')[0],
|
||||
'details': video['description'],
|
||||
'studio': {
|
||||
'Name': video['creator']
|
||||
},
|
||||
'tags':[
|
||||
{
|
||||
'name': x.strip()
|
||||
} for x in tags
|
||||
],
|
||||
'performers': [
|
||||
{
|
||||
'name': x.strip()
|
||||
} for x in video['stars']
|
||||
]
|
||||
}
|
||||
|
||||
'''
|
||||
Assumes the SceneID is in the title of the video,
|
||||
e.g. "Hot video 12ab3c45de6f7890abc12ff0.mp4" or similar.
|
||||
The json blob that gets passed though for script based sceneByFragment scaper
|
||||
doesn't get the filename, unlike the xpath scraper, but the name as shown in Stash.
|
||||
'''
|
||||
def sceneByFragment(params):
|
||||
|
||||
if not params['title']:
|
||||
fail('JSON blob did not contain title property')
|
||||
|
||||
regex = re.search(r"([a-z0-9]{24})", params['title'])
|
||||
|
||||
if not regex:
|
||||
fail(f"Did not find scene ID from video title {params['title']}")
|
||||
|
||||
sceneId = regex.group(1)
|
||||
|
||||
data = getVideoById(sceneId)
|
||||
return data
|
||||
|
||||
|
||||
'''
|
||||
This assumes a URL of https://pmvhaven.com/video/{title}_{alphanumericVideoId}
|
||||
As of 2024-01-01, this is the only valid video URL format. If this changes in
|
||||
the future (i.e. more than one valid URL type, or ID not present in URL) and
|
||||
requires falling back to the old cloudscraper method, an xpath of
|
||||
//meta[@property="video-id"]/@content
|
||||
can be used to pass into the PMVHaven API
|
||||
'''
|
||||
def sceneByURL(params):
|
||||
|
||||
if not params['url']:
|
||||
fail('No URL entered')
|
||||
|
||||
sceneId = params['url'].split('_')[-1]
|
||||
|
||||
if not sceneId or not sceneId.isalnum():
|
||||
fail(f"Did not find scene ID from PMVStash video URL {params['url']}")
|
||||
|
||||
data = getVideoById(sceneId)
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
calledFunction = sys.argv[1]
|
||||
params = json.loads(sys.stdin.read())
|
||||
|
||||
match calledFunction:
|
||||
case 'sceneByURL':
|
||||
print(json.dumps(sceneByURL(params)))
|
||||
case 'sceneByFragment':
|
||||
print(json.dumps(sceneByFragment(params)))
|
||||
case _:
|
||||
fail("This scrape method has not been implemented!")
|
||||
Reference in New Issue
Block a user