579 lines
19 KiB
Python
579 lines
19 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from urllib.parse import urlparse
|
|
from datetime import datetime, timedelta
|
|
|
|
# to import from a parent directory we need to add that directory to the system path
|
|
csd = os.path.dirname(
|
|
os.path.realpath(__file__)) # get current script directory
|
|
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
|
|
sys.path.append(
|
|
parent
|
|
) # add parent dir to sys path so that we can import py_common from there
|
|
|
|
try:
|
|
import requests
|
|
except ModuleNotFoundError:
|
|
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
|
|
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
|
|
sys.exit()
|
|
|
|
try:
|
|
import py_common.log as log
|
|
except ModuleNotFoundError:
|
|
print("You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
|
|
sys.exit()
|
|
|
|
# Max number of scenes that a site can return for the search.
|
|
MAX_SCENES = 6
|
|
|
|
# Marker
|
|
# If you want to create a marker while Scraping.
|
|
CREATE_MARKER = False
|
|
# Only create marker if the durations match (API vs Stash)
|
|
MARKER_DURATION_MATCH = True
|
|
# Sometimes the API duration is 0/1, so we can't really know if this matches. True if you want to create anyways
|
|
MARKER_DURATION_UNSURE = True
|
|
# Max allowed difference (seconds) in scene length between Stash & API.
|
|
MARKER_SEC_DIFF = 10
|
|
|
|
# Tags you don't want to see in the Scraper window.
|
|
IGNORE_TAGS = ["Sex","Feature","HD","Big Dick"]
|
|
# Tags you want to add in the Scraper window.
|
|
FIXED_TAGS = ""
|
|
# Check the SSL Certificate.
|
|
CHECK_SSL_CERT = True
|
|
# Local folder with JSON inside (Only used if scene isn't found from the API)
|
|
LOCAL_PATH = r""
|
|
|
|
SERVER_IP = "http://localhost:9999"
|
|
# API key (Settings > Configuration > Authentication)
|
|
STASH_API = ""
|
|
|
|
# Automatically reattempt GraphQL queries to Vixen sites which fail with a 403 response
|
|
MAX_403_REATTEMPTS = 20
|
|
|
|
SERVER_URL = SERVER_IP + "/graphql"
|
|
|
|
def callGraphQL(query, variables=None):
|
|
headers = {
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Content-Type": "application/json",
|
|
"Accept": "application/json",
|
|
"Connection": "keep-alive",
|
|
"DNT": "1",
|
|
"ApiKey": STASH_API
|
|
}
|
|
json = {'query': query}
|
|
if variables is not None:
|
|
json['variables'] = variables
|
|
try:
|
|
response = requests.post(SERVER_URL, json=json, headers=headers)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if result.get("error"):
|
|
for error in result["error"]["errors"]:
|
|
raise Exception("GraphQL error: {}".format(error))
|
|
if result.get("data"):
|
|
return result.get("data")
|
|
elif response.status_code == 401:
|
|
log.error("[GraphQL] HTTP Error 401, Unauthorised.")
|
|
return None
|
|
else:
|
|
raise ConnectionError("GraphQL query failed:{} - {}".format(response.status_code, response.content))
|
|
except Exception as err:
|
|
log.error(err)
|
|
return None
|
|
|
|
def graphql_findTagbyName(name):
|
|
query = """
|
|
query {
|
|
allTags {
|
|
id
|
|
name
|
|
aliases
|
|
}
|
|
}
|
|
"""
|
|
result = callGraphQL(query)
|
|
normalized_name = name.lower().strip()
|
|
for tag in result["allTags"]:
|
|
if tag["name"].lower() == normalized_name:
|
|
return tag["id"]
|
|
if tag.get("aliases"):
|
|
for alias in tag["aliases"]:
|
|
if alias.lower() == normalized_name:
|
|
return tag["id"]
|
|
return None
|
|
|
|
def graphql_createMarker(scene_id, title, main_tag, seconds, tags=[]):
|
|
main_tag_id = graphql_findTagbyName(main_tag)
|
|
if main_tag_id is None:
|
|
log.warning("The 'Primary Tag' don't exist ({}), marker won't be created.".format(main_tag))
|
|
return None
|
|
log.info("Creating Marker: {}".format(title))
|
|
query = """
|
|
mutation SceneMarkerCreate($title: String!, $seconds: Float!, $scene_id: ID!, $primary_tag_id: ID!, $tag_ids: [ID!] = []) {
|
|
sceneMarkerCreate(
|
|
input: {
|
|
title: $title
|
|
seconds: $seconds
|
|
scene_id: $scene_id
|
|
primary_tag_id: $primary_tag_id
|
|
tag_ids: $tag_ids
|
|
}
|
|
) {
|
|
...SceneMarkerData
|
|
}
|
|
}
|
|
fragment SceneMarkerData on SceneMarker {
|
|
id
|
|
title
|
|
seconds
|
|
stream
|
|
preview
|
|
screenshot
|
|
scene {
|
|
id
|
|
}
|
|
primary_tag {
|
|
id
|
|
name
|
|
aliases
|
|
}
|
|
tags {
|
|
id
|
|
name
|
|
aliases
|
|
}
|
|
}
|
|
"""
|
|
variables = {
|
|
"primary_tag_id": main_tag_id,
|
|
"scene_id": scene_id,
|
|
"seconds": seconds,
|
|
"title": title,
|
|
"tag_ids": tags
|
|
}
|
|
result = callGraphQL(query, variables)
|
|
return result
|
|
|
|
def graphql_getMarker(scene_id):
|
|
query = """
|
|
query FindScene($id: ID!, $checksum: String) {
|
|
findScene(id: $id, checksum: $checksum) {
|
|
scene_markers {
|
|
seconds
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
variables = {
|
|
"id": scene_id
|
|
}
|
|
result = callGraphQL(query, variables)
|
|
if result:
|
|
if result["findScene"].get("scene_markers"):
|
|
return [x.get("seconds") for x in result["findScene"]["scene_markers"]]
|
|
return None
|
|
|
|
def graphql_getScene(scene_id):
|
|
query = """
|
|
query FindScene($id: ID!, $checksum: String) {
|
|
findScene(id: $id, checksum: $checksum) {
|
|
files {
|
|
duration
|
|
}
|
|
scene_markers {
|
|
seconds
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
variables = {
|
|
"id": scene_id
|
|
}
|
|
result = callGraphQL(query, variables)
|
|
if result:
|
|
return_dict = {}
|
|
return_dict["duration"] = result["findScene"]["files"][0]["duration"]
|
|
if result["findScene"].get("scene_markers"):
|
|
return_dict["marker"] = [x.get("seconds") for x in result["findScene"]["scene_markers"]]
|
|
else:
|
|
return_dict["marker"] = None
|
|
return return_dict
|
|
return None
|
|
|
|
def parse_duration_to_seconds(duration):
|
|
if duration is None:
|
|
return None
|
|
t = datetime.strptime(duration,"%H:%M:%S")
|
|
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
|
|
return delta.seconds
|
|
|
|
def process_chapters(scene_id, api_json):
|
|
if scene_id and STASH_API and CREATE_MARKER and api_json != {}:
|
|
log.debug(f"Processing markers: {json.dumps(api_json.get('markers'))}")
|
|
markers = api_json.get("markers")
|
|
if markers:
|
|
stash_scene_info = graphql_getScene(scene_id)
|
|
api_scene_duration = None
|
|
if api_json.get("runLength"):
|
|
api_scene_duration = api_json.get("runLength")
|
|
|
|
log.debug(f"API Duration: {api_scene_duration}")
|
|
if MARKER_DURATION_MATCH and api_scene_duration is None:
|
|
log.info("No duration given by the API.")
|
|
else:
|
|
log.debug("Stash Len: {}| API Len: {}".format(stash_scene_info["duration"], api_scene_duration))
|
|
if (MARKER_DURATION_MATCH and api_scene_duration-MARKER_SEC_DIFF <= stash_scene_info["duration"] <= api_scene_duration+MARKER_SEC_DIFF) or (api_scene_duration in [0,1] and MARKER_DURATION_UNSURE):
|
|
for marker in markers:
|
|
if stash_scene_info.get("marker"):
|
|
if marker.get("seconds") in stash_scene_info["marker"]:
|
|
log.debug("Ignoring marker ({}) because already have with same time.".format(marker.get("seconds")))
|
|
continue
|
|
try:
|
|
graphql_createMarker(scene_id, marker.get("title"), marker.get("title"), marker.get("seconds"))
|
|
except:
|
|
log.error("Marker failed to create")
|
|
else:
|
|
log.info("The duration of this scene don't match the duration of stash scene.")
|
|
else:
|
|
log.info("No offical marker for this scene")
|
|
|
|
class Site:
|
|
|
|
def __init__(self, name: str):
|
|
self.name = name
|
|
self.id = name.replace(' ', '').upper()
|
|
self.api = "https://www." + self.id.lower() + ".com/graphql"
|
|
self.home = "https://www." + self.id.lower() + ".com"
|
|
self.search_count = MAX_SCENES
|
|
|
|
def isValidURL(self, url: str):
|
|
u = url.lower().rstrip("/")
|
|
up = urlparse(u)
|
|
if up.hostname is None:
|
|
return False
|
|
if up.hostname.lstrip("www.").rstrip(".com") == self.id.lower():
|
|
splits = u.split("/")
|
|
if len(splits) < 4:
|
|
return False
|
|
if splits[-2] == "videos":
|
|
return True
|
|
return False
|
|
|
|
def getSlug(self, url: str):
|
|
u = url.lower().rstrip("/")
|
|
slug = u.split("/")[-1]
|
|
return slug
|
|
|
|
def getScene(self, url: str):
|
|
log.debug(f"Scraping using {self.name} graphql API")
|
|
q = {
|
|
'query': self.getVideoQuery,
|
|
'operationName': "getVideo",
|
|
'variables': {
|
|
"site": self.id,
|
|
"videoSlug": self.getSlug(url)
|
|
}
|
|
}
|
|
r = self.callGraphQL(query=q, referer=url)
|
|
return self.parse_scene(r)
|
|
|
|
def getSearchResult(self, query: str):
|
|
log.debug(f"Searching using {self.name} graphql API")
|
|
q = {
|
|
'query': self.getSearchQuery,
|
|
'operationName': "getSearchResults",
|
|
'variables': {
|
|
"site": self.id,
|
|
"query": query,
|
|
"first": self.search_count
|
|
}
|
|
}
|
|
r = self.callGraphQL(query=q, referer=self.home)
|
|
return self.parse_search(r)
|
|
|
|
def callGraphQL(self, query: dict, referer: str):
|
|
headers = {
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Content-Type": "application/json",
|
|
"Accept": "application/json",
|
|
"Referer": referer,
|
|
"DNT": "1",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0",
|
|
}
|
|
if not query:
|
|
return None
|
|
|
|
reattempts = 0
|
|
while True:
|
|
try:
|
|
response = requests.post(self.api, json=query, headers=headers)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if result.get("error"):
|
|
for error in result["error"]["errors"]:
|
|
raise Exception(f"GraphQL error: {error}")
|
|
if reattempts > 0:
|
|
log.debug(f"Successful query after attempt #{reattempts}")
|
|
return result
|
|
elif response.status_code == 403:
|
|
log.error("GraphQL query recieved a 403 status response")
|
|
if reattempts < MAX_403_REATTEMPTS:
|
|
log.debug(f"403 Reattempt {reattempts}/{MAX_403_REATTEMPTS}")
|
|
else:
|
|
log.error(f"Reached max 403 errors for GraphQL query")
|
|
return {}
|
|
else:
|
|
raise ConnectionError(
|
|
f"GraphQL query failed:{response.status_code} - {response.content}"
|
|
)
|
|
except Exception as err:
|
|
log.error(f"GraphqQL query failed {err}")
|
|
return None
|
|
|
|
def parse_scene(self, response):
|
|
scene = {}
|
|
if response is None or response.get('data') is None:
|
|
return scene
|
|
|
|
data = response['data'].get('findOneVideo')
|
|
if data:
|
|
scene['title'] = data.get('title')
|
|
scene['details'] = data.get('description')
|
|
scene['studio'] = {"name": self.name}
|
|
scene['code'] = data.get('videoId')
|
|
director = data.get("directors")
|
|
if director is not None:
|
|
scene["director"] = ", ".join(d["name"] for d in data.get("directors", []))
|
|
|
|
date = data.get('releaseDate')
|
|
if date:
|
|
scene['date'] = date.split("T")[0]
|
|
scene['performers'] = []
|
|
if data.get('models'):
|
|
for model in data['models']:
|
|
scene['performers'].append({"name": model['name']})
|
|
|
|
scene['tags'] = []
|
|
tags = data.get('tags')
|
|
categories = data.get('categories')
|
|
if tags == [] and categories:
|
|
for tag in data['categories']:
|
|
scene['tags'].append({"name": tag['name']})
|
|
elif tags:
|
|
for tag in data['tags']:
|
|
scene['tags'].append({"name": tag})
|
|
|
|
if data.get('images'):
|
|
if data['images'].get('poster'):
|
|
maxWidth = 0
|
|
for image in data['images']['poster']:
|
|
if image['width'] > maxWidth:
|
|
scene['image'] = image['src']
|
|
maxWidth = image['width']
|
|
if url:
|
|
scene["url"] = url
|
|
|
|
scene['runLength'] = parse_duration_to_seconds(data.get("runLength"))
|
|
|
|
markers = data.get('chapters', {}).get('video')
|
|
if markers:
|
|
scene["markers"] = markers
|
|
|
|
return scene
|
|
return None
|
|
|
|
def parse_search(self, response):
|
|
search_result = []
|
|
|
|
if response is None or response.get('data') is None:
|
|
return search_result
|
|
|
|
data = response['data'].get('searchVideos')
|
|
if data:
|
|
for scene in data["edges"]:
|
|
scene = scene.get("node")
|
|
if scene:
|
|
slug = scene.get('slug')
|
|
# search results without a url are useless
|
|
# only add results with a slug present
|
|
if slug:
|
|
sc = {}
|
|
sc['title'] = scene.get('title')
|
|
sc['details'] = scene.get('description')
|
|
sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}"
|
|
sc['code'] = scene.get('videoId')
|
|
sc['studio'] = {"name": self.name}
|
|
date = scene.get('releaseDate')
|
|
if date:
|
|
sc['date'] = date.split("T")[0]
|
|
sc['performers'] = []
|
|
if scene.get('modelsSlugged'):
|
|
for model in scene['modelsSlugged']:
|
|
sc['performers'].append(
|
|
{"name": model['name']})
|
|
if scene.get('images'):
|
|
if scene['images'].get('listing'):
|
|
maxWidth = 0
|
|
for image in scene['images']['listing']:
|
|
if image['width'] > maxWidth:
|
|
sc['image'] = image['src']
|
|
maxWidth = image['width']
|
|
search_result.append(sc)
|
|
return search_result
|
|
return None
|
|
|
|
@property
|
|
def length(self):
|
|
return len(self.id)
|
|
|
|
getVideoQuery = """
|
|
query getVideo($videoSlug: String, $site: Site) {
|
|
findOneVideo(input: {slug: $videoSlug, site: $site}) {
|
|
title
|
|
description
|
|
releaseDate
|
|
models {
|
|
name
|
|
}
|
|
videoId
|
|
directors {
|
|
name
|
|
}
|
|
images {
|
|
poster {
|
|
src
|
|
width
|
|
}
|
|
}
|
|
tags
|
|
categories {
|
|
name
|
|
}
|
|
runLength
|
|
chapters {
|
|
video {
|
|
title
|
|
seconds
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
getSearchQuery = """
|
|
query getSearchResults($query: String!, $site: Site!, $first: Int) {
|
|
searchVideos(input: { query: $query, site: $site, first: $first }) {
|
|
edges {
|
|
node {
|
|
description
|
|
title
|
|
slug
|
|
releaseDate
|
|
modelsSlugged: models {
|
|
name
|
|
slugged: slug
|
|
}
|
|
videoId
|
|
images {
|
|
listing {
|
|
src
|
|
width
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
|
|
studios = {
|
|
Site('Blacked Raw'),
|
|
Site('Blacked'),
|
|
Site('Deeper'),
|
|
Site('Milfy'),
|
|
Site('Tushy'),
|
|
Site('Tushy Raw'),
|
|
Site('Slayed'),
|
|
Site('Vixen')
|
|
}
|
|
|
|
frag = json.loads(sys.stdin.read())
|
|
search_query = frag.get("name")
|
|
url = frag.get("url")
|
|
scene_id = frag.get("id")
|
|
|
|
def check_alternate_urls(site):
|
|
for u in frag.get("urls", []):
|
|
if site.isValidURL(u):
|
|
return u
|
|
return None
|
|
|
|
#sceneByURL
|
|
if url:
|
|
for x in studios:
|
|
proper_url = None
|
|
if x.isValidURL(url):
|
|
proper_url = url
|
|
else:
|
|
proper_url = check_alternate_urls(site=x)
|
|
|
|
if proper_url != None:
|
|
s = x.getScene(proper_url)
|
|
# log.info(f"{json.dumps(s)}")
|
|
process_chapters(scene_id=scene_id, api_json=s)
|
|
|
|
# drop unwanted keys from json result
|
|
s.pop('runLength', None)
|
|
s.pop('markers', None)
|
|
|
|
print(json.dumps(s))
|
|
sys.exit(0)
|
|
log.error(f"URL: {url} is not supported")
|
|
print("{}")
|
|
sys.exit(1)
|
|
|
|
#sceneByName
|
|
if search_query and "search" in sys.argv:
|
|
search_query = search_query.lower()
|
|
lst = []
|
|
wanted = []
|
|
|
|
# Only search on specific site if the studio name is in the search query
|
|
# ('Ariana Vixen Cecilia' will search only on Vixen)
|
|
|
|
# if the first character is $, filter will be ignored.
|
|
if search_query[0] != "$":
|
|
# make sure longer matches are filtered first
|
|
studios_sorted = sorted(studios, reverse=True, key=lambda s: s.length)
|
|
for x in studios_sorted:
|
|
if x.id.lower() in search_query:
|
|
wanted.append(x.id.lower())
|
|
continue
|
|
# remove the studio from the search result
|
|
search_query = search_query.replace(x.id.lower(), "")
|
|
else:
|
|
search_query = search_query[1:]
|
|
|
|
if wanted:
|
|
log.info(f"Filter: {wanted} applied")
|
|
|
|
log.debug(f"Query: '{search_query}'")
|
|
|
|
for x in studios:
|
|
if wanted:
|
|
if x.id.lower() not in wanted:
|
|
log.debug(f"[Filter] ignoring {x.id}")
|
|
continue
|
|
s = x.getSearchResult(search_query)
|
|
# merge all list into one
|
|
if s:
|
|
lst.extend(s)
|
|
#log.debug(f"{json.dumps(lst)}")
|
|
print(json.dumps(lst))
|
|
sys.exit(0)
|