import json import sys import requests import re try: import py_common.log as log except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit(1) def fail(message): log.error(message) sys.exit(1) def getData(sceneId: str): try: req = requests.post("https://pmvhaven.com/api/v2/videoInput", json={ "video": sceneId, "mode": "InitVideo", "view": True }) except Exception as e: fail(f"Error fetching data from PMVHaven API: {e}") return req.json() def getIMG(video): # reversed because we want the most recent thumb for item in reversed(video['thumbnails']): if item.startswith("https://storage.pmvhaven.com/"): return item return "" def getVideoById(sceneId): data = getData(sceneId) if not 'video' in data or len(data['video']) < 1: fail(f"Video data not found in API response: {data}") video = data['video'][0] tags = video['tags'] + video['categories'] urlTitle = video['title'].replace(' ', '-') return { 'title': video['title'], 'url': f"https://pmvhaven.com/video/{urlTitle}_{video['_id']}", 'image': getIMG(video), 'date': video['isoDate'].split('T')[0], 'details': video['description'], 'studio': { 'Name': video['creator'] }, 'tags':[ { 'name': x.strip() } for x in tags ], 'performers': [ { 'name': x.strip() } for x in video['stars'] ] } ''' Assumes the SceneID is in the title of the video, e.g. "Hot video 12ab3c45de6f7890abc12ff0.mp4" or similar. The json blob that gets passed though for script based sceneByFragment scaper doesn't get the filename, unlike the xpath scraper, but the name as shown in Stash. ''' def sceneByFragment(params): if not params['title']: fail('JSON blob did not contain title property') regex = re.search(r"([a-z0-9]{24})", params['title']) if not regex: fail(f"Did not find scene ID from video title {params['title']}") sceneId = regex.group(1) data = getVideoById(sceneId) return data ''' This assumes a URL of https://pmvhaven.com/video/{title}_{alphanumericVideoId} As of 2024-01-01, this is the only valid video URL format. If this changes in the future (i.e. more than one valid URL type, or ID not present in URL) and requires falling back to the old cloudscraper method, an xpath of //meta[@property="video-id"]/@content can be used to pass into the PMVHaven API ''' def sceneByURL(params): if not params['url']: fail('No URL entered') sceneId = params['url'].split('_')[-1] if not sceneId or not sceneId.isalnum(): fail(f"Did not find scene ID from PMVStash video URL {params['url']}") data = getVideoById(sceneId) return data if __name__ == "__main__": calledFunction = sys.argv[1] params = json.loads(sys.stdin.read()) match calledFunction: case 'sceneByURL': print(json.dumps(sceneByURL(params))) case 'sceneByFragment': print(json.dumps(sceneByFragment(params))) case _: fail("This scrape method has not been implemented!")