import json import os import sys from urllib.parse import urlparse from datetime import datetime, timedelta # to import from a parent directory we need to add that directory to the system path csd = os.path.dirname( os.path.realpath(__file__)) # get current script directory parent = os.path.dirname(csd) # parent directory (should be the scrapers one) sys.path.append( parent ) # add parent dir to sys path so that we can import py_common from there try: import requests except ModuleNotFoundError: print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) sys.exit() try: import py_common.log as log except ModuleNotFoundError: print("You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() # Max number of scenes that a site can return for the search. MAX_SCENES = 6 # Marker # If you want to create a marker while Scraping. CREATE_MARKER = False # Only create marker if the durations match (API vs Stash) MARKER_DURATION_MATCH = True # Sometimes the API duration is 0/1, so we can't really know if this matches. True if you want to create anyways MARKER_DURATION_UNSURE = True # Max allowed difference (seconds) in scene length between Stash & API. MARKER_SEC_DIFF = 10 # Tags you don't want to see in the Scraper window. IGNORE_TAGS = ["Sex","Feature","HD","Big Dick"] # Tags you want to add in the Scraper window. FIXED_TAGS = "" # Check the SSL Certificate. CHECK_SSL_CERT = True # Local folder with JSON inside (Only used if scene isn't found from the API) LOCAL_PATH = r"" SERVER_IP = "http://localhost:9999" # API key (Settings > Configuration > Authentication) STASH_API = "" # Automatically reattempt GraphQL queries to Vixen sites which fail with a 403 response MAX_403_REATTEMPTS = 20 SERVER_URL = SERVER_IP + "/graphql" def callGraphQL(query, variables=None): headers = { "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json", "Accept": "application/json", "Connection": "keep-alive", "DNT": "1", "ApiKey": STASH_API } json = {'query': query} if variables is not None: json['variables'] = variables try: response = requests.post(SERVER_URL, json=json, headers=headers) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception("GraphQL error: {}".format(error)) if result.get("data"): return result.get("data") elif response.status_code == 401: log.error("[GraphQL] HTTP Error 401, Unauthorised.") return None else: raise ConnectionError("GraphQL query failed:{} - {}".format(response.status_code, response.content)) except Exception as err: log.error(err) return None def graphql_findTagbyName(name): query = """ query { allTags { id name aliases } } """ result = callGraphQL(query) normalized_name = name.lower().strip() for tag in result["allTags"]: if tag["name"].lower() == normalized_name: return tag["id"] if tag.get("aliases"): for alias in tag["aliases"]: if alias.lower() == normalized_name: return tag["id"] return None def graphql_createMarker(scene_id, title, main_tag, seconds, tags=[]): main_tag_id = graphql_findTagbyName(main_tag) if main_tag_id is None: log.warning("The 'Primary Tag' don't exist ({}), marker won't be created.".format(main_tag)) return None log.info("Creating Marker: {}".format(title)) query = """ mutation SceneMarkerCreate($title: String!, $seconds: Float!, $scene_id: ID!, $primary_tag_id: ID!, $tag_ids: [ID!] = []) { sceneMarkerCreate( input: { title: $title seconds: $seconds scene_id: $scene_id primary_tag_id: $primary_tag_id tag_ids: $tag_ids } ) { ...SceneMarkerData } } fragment SceneMarkerData on SceneMarker { id title seconds stream preview screenshot scene { id } primary_tag { id name aliases } tags { id name aliases } } """ variables = { "primary_tag_id": main_tag_id, "scene_id": scene_id, "seconds": seconds, "title": title, "tag_ids": tags } result = callGraphQL(query, variables) return result def graphql_getMarker(scene_id): query = """ query FindScene($id: ID!, $checksum: String) { findScene(id: $id, checksum: $checksum) { scene_markers { seconds } } } """ variables = { "id": scene_id } result = callGraphQL(query, variables) if result: if result["findScene"].get("scene_markers"): return [x.get("seconds") for x in result["findScene"]["scene_markers"]] return None def graphql_getScene(scene_id): query = """ query FindScene($id: ID!, $checksum: String) { findScene(id: $id, checksum: $checksum) { files { duration } scene_markers { seconds } } } """ variables = { "id": scene_id } result = callGraphQL(query, variables) if result: return_dict = {} return_dict["duration"] = result["findScene"]["files"][0]["duration"] if result["findScene"].get("scene_markers"): return_dict["marker"] = [x.get("seconds") for x in result["findScene"]["scene_markers"]] else: return_dict["marker"] = None return return_dict return None def parse_duration_to_seconds(duration): if duration is None: return None t = datetime.strptime(duration,"%H:%M:%S") delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) return delta.seconds def process_chapters(scene_id, api_json): if scene_id and STASH_API and CREATE_MARKER and api_json != {}: log.debug(f"Processing markers: {json.dumps(api_json.get('markers'))}") markers = api_json.get("markers") if markers: stash_scene_info = graphql_getScene(scene_id) api_scene_duration = None if api_json.get("runLength"): api_scene_duration = api_json.get("runLength") log.debug(f"API Duration: {api_scene_duration}") if MARKER_DURATION_MATCH and api_scene_duration is None: log.info("No duration given by the API.") else: log.debug("Stash Len: {}| API Len: {}".format(stash_scene_info["duration"], api_scene_duration)) if (MARKER_DURATION_MATCH and api_scene_duration-MARKER_SEC_DIFF <= stash_scene_info["duration"] <= api_scene_duration+MARKER_SEC_DIFF) or (api_scene_duration in [0,1] and MARKER_DURATION_UNSURE): for marker in markers: if stash_scene_info.get("marker"): if marker.get("seconds") in stash_scene_info["marker"]: log.debug("Ignoring marker ({}) because already have with same time.".format(marker.get("seconds"))) continue try: graphql_createMarker(scene_id, marker.get("title"), marker.get("title"), marker.get("seconds")) except: log.error("Marker failed to create") else: log.info("The duration of this scene don't match the duration of stash scene.") else: log.info("No offical marker for this scene") class Site: def __init__(self, name: str): self.name = name self.id = name.replace(' ', '').upper() self.api = "https://www." + self.id.lower() + ".com/graphql" self.home = "https://www." + self.id.lower() + ".com" self.search_count = MAX_SCENES def isValidURL(self, url: str): u = url.lower().rstrip("/") up = urlparse(u) if up.hostname is None: return False if up.hostname.lstrip("www.").rstrip(".com") == self.id.lower(): splits = u.split("/") if len(splits) < 4: return False if splits[-2] == "videos": return True return False def getSlug(self, url: str): u = url.lower().rstrip("/") slug = u.split("/")[-1] return slug def getScene(self, url: str): log.debug(f"Scraping using {self.name} graphql API") q = { 'query': self.getVideoQuery, 'operationName': "getVideo", 'variables': { "site": self.id, "videoSlug": self.getSlug(url) } } r = self.callGraphQL(query=q, referer=url) return self.parse_scene(r) def getSearchResult(self, query: str): log.debug(f"Searching using {self.name} graphql API") q = { 'query': self.getSearchQuery, 'operationName': "getSearchResults", 'variables': { "site": self.id, "query": query, "first": self.search_count } } r = self.callGraphQL(query=q, referer=self.home) return self.parse_search(r) def callGraphQL(self, query: dict, referer: str): headers = { "Accept-Encoding": "gzip, deflate", "Content-Type": "application/json", "Accept": "application/json", "Referer": referer, "DNT": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0", } if not query: return None reattempts = 0 while True: try: response = requests.post(self.api, json=query, headers=headers) if response.status_code == 200: result = response.json() if result.get("error"): for error in result["error"]["errors"]: raise Exception(f"GraphQL error: {error}") if reattempts > 0: log.debug(f"Successful query after attempt #{reattempts}") return result elif response.status_code == 403: log.error("GraphQL query recieved a 403 status response") if reattempts < MAX_403_REATTEMPTS: log.debug(f"403 Reattempt {reattempts}/{MAX_403_REATTEMPTS}") else: log.error(f"Reached max 403 errors for GraphQL query") return {} else: raise ConnectionError( f"GraphQL query failed:{response.status_code} - {response.content}" ) except Exception as err: log.error(f"GraphqQL query failed {err}") return None def parse_scene(self, response): scene = {} if response is None or response.get('data') is None: return scene data = response['data'].get('findOneVideo') if data: scene['title'] = data.get('title') scene['details'] = data.get('description') scene['studio'] = {"name": self.name} scene['code'] = data.get('videoId') director = data.get("directors") if director is not None: scene["director"] = ", ".join(d["name"] for d in data.get("directors", [])) date = data.get('releaseDate') if date: scene['date'] = date.split("T")[0] scene['performers'] = [] if data.get('models'): for model in data['models']: scene['performers'].append({"name": model['name']}) scene['tags'] = [] tags = data.get('tags') categories = data.get('categories') if tags == [] and categories: for tag in data['categories']: scene['tags'].append({"name": tag['name']}) elif tags: for tag in data['tags']: scene['tags'].append({"name": tag}) if data.get('images'): if data['images'].get('poster'): maxWidth = 0 for image in data['images']['poster']: if image['width'] > maxWidth: scene['image'] = image['src'] maxWidth = image['width'] if url: scene["url"] = url scene['runLength'] = parse_duration_to_seconds(data.get("runLength")) markers = data.get('chapters', {}).get('video') if markers: scene["markers"] = markers return scene return None def parse_search(self, response): search_result = [] if response is None or response.get('data') is None: return search_result data = response['data'].get('searchVideos') if data: for scene in data["edges"]: scene = scene.get("node") if scene: slug = scene.get('slug') # search results without a url are useless # only add results with a slug present if slug: sc = {} sc['title'] = scene.get('title') sc['details'] = scene.get('description') sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}" sc['code'] = scene.get('videoId') sc['studio'] = {"name": self.name} date = scene.get('releaseDate') if date: sc['date'] = date.split("T")[0] sc['performers'] = [] if scene.get('modelsSlugged'): for model in scene['modelsSlugged']: sc['performers'].append( {"name": model['name']}) if scene.get('images'): if scene['images'].get('listing'): maxWidth = 0 for image in scene['images']['listing']: if image['width'] > maxWidth: sc['image'] = image['src'] maxWidth = image['width'] search_result.append(sc) return search_result return None @property def length(self): return len(self.id) getVideoQuery = """ query getVideo($videoSlug: String, $site: Site) { findOneVideo(input: {slug: $videoSlug, site: $site}) { title description releaseDate models { name } videoId directors { name } images { poster { src width } } tags categories { name } runLength chapters { video { title seconds } } } } """ getSearchQuery = """ query getSearchResults($query: String!, $site: Site!, $first: Int) { searchVideos(input: { query: $query, site: $site, first: $first }) { edges { node { description title slug releaseDate modelsSlugged: models { name slugged: slug } videoId images { listing { src width } } } } } } """ studios = { Site('Blacked Raw'), Site('Blacked'), Site('Deeper'), Site('Milfy'), Site('Tushy'), Site('Tushy Raw'), Site('Slayed'), Site('Vixen') } frag = json.loads(sys.stdin.read()) search_query = frag.get("name") url = frag.get("url") scene_id = frag.get("id") def check_alternate_urls(site): for u in frag.get("urls", []): if site.isValidURL(u): return u return None #sceneByURL if url: for x in studios: proper_url = None if x.isValidURL(url): proper_url = url else: proper_url = check_alternate_urls(site=x) if proper_url != None: s = x.getScene(proper_url) # log.info(f"{json.dumps(s)}") process_chapters(scene_id=scene_id, api_json=s) # drop unwanted keys from json result s.pop('runLength', None) s.pop('markers', None) print(json.dumps(s)) sys.exit(0) log.error(f"URL: {url} is not supported") print("{}") sys.exit(1) #sceneByName if search_query and "search" in sys.argv: search_query = search_query.lower() lst = [] wanted = [] # Only search on specific site if the studio name is in the search query # ('Ariana Vixen Cecilia' will search only on Vixen) # if the first character is $, filter will be ignored. if search_query[0] != "$": # make sure longer matches are filtered first studios_sorted = sorted(studios, reverse=True, key=lambda s: s.length) for x in studios_sorted: if x.id.lower() in search_query: wanted.append(x.id.lower()) continue # remove the studio from the search result search_query = search_query.replace(x.id.lower(), "") else: search_query = search_query[1:] if wanted: log.info(f"Filter: {wanted} applied") log.debug(f"Query: '{search_query}'") for x in studios: if wanted: if x.id.lower() not in wanted: log.debug(f"[Filter] ignoring {x.id}") continue s = x.getSearchResult(search_query) # merge all list into one if s: lst.extend(s) #log.debug(f"{json.dumps(lst)}") print(json.dumps(lst)) sys.exit(0)