stash
This commit is contained in:
578
stash/config/scrapers/community/vixenNetwork/vixenNetwork.py
Normal file
578
stash/config/scrapers/community/vixenNetwork/vixenNetwork.py
Normal file
@@ -0,0 +1,578 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# to import from a parent directory we need to add that directory to the system path
|
||||
csd = os.path.dirname(
|
||||
os.path.realpath(__file__)) # get current script directory
|
||||
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
|
||||
sys.path.append(
|
||||
parent
|
||||
) # add parent dir to sys path so that we can import py_common from there
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
|
||||
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
import py_common.log as log
|
||||
except ModuleNotFoundError:
|
||||
print("You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
# Max number of scenes that a site can return for the search.
|
||||
MAX_SCENES = 6
|
||||
|
||||
# Marker
|
||||
# If you want to create a marker while Scraping.
|
||||
CREATE_MARKER = False
|
||||
# Only create marker if the durations match (API vs Stash)
|
||||
MARKER_DURATION_MATCH = True
|
||||
# Sometimes the API duration is 0/1, so we can't really know if this matches. True if you want to create anyways
|
||||
MARKER_DURATION_UNSURE = True
|
||||
# Max allowed difference (seconds) in scene length between Stash & API.
|
||||
MARKER_SEC_DIFF = 10
|
||||
|
||||
# Tags you don't want to see in the Scraper window.
|
||||
IGNORE_TAGS = ["Sex","Feature","HD","Big Dick"]
|
||||
# Tags you want to add in the Scraper window.
|
||||
FIXED_TAGS = ""
|
||||
# Check the SSL Certificate.
|
||||
CHECK_SSL_CERT = True
|
||||
# Local folder with JSON inside (Only used if scene isn't found from the API)
|
||||
LOCAL_PATH = r""
|
||||
|
||||
SERVER_IP = "http://localhost:9999"
|
||||
# API key (Settings > Configuration > Authentication)
|
||||
STASH_API = ""
|
||||
|
||||
# Automatically reattempt GraphQL queries to Vixen sites which fail with a 403 response
|
||||
MAX_403_REATTEMPTS = 20
|
||||
|
||||
SERVER_URL = SERVER_IP + "/graphql"
|
||||
|
||||
def callGraphQL(query, variables=None):
|
||||
headers = {
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Connection": "keep-alive",
|
||||
"DNT": "1",
|
||||
"ApiKey": STASH_API
|
||||
}
|
||||
json = {'query': query}
|
||||
if variables is not None:
|
||||
json['variables'] = variables
|
||||
try:
|
||||
response = requests.post(SERVER_URL, json=json, headers=headers)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get("error"):
|
||||
for error in result["error"]["errors"]:
|
||||
raise Exception("GraphQL error: {}".format(error))
|
||||
if result.get("data"):
|
||||
return result.get("data")
|
||||
elif response.status_code == 401:
|
||||
log.error("[GraphQL] HTTP Error 401, Unauthorised.")
|
||||
return None
|
||||
else:
|
||||
raise ConnectionError("GraphQL query failed:{} - {}".format(response.status_code, response.content))
|
||||
except Exception as err:
|
||||
log.error(err)
|
||||
return None
|
||||
|
||||
def graphql_findTagbyName(name):
|
||||
query = """
|
||||
query {
|
||||
allTags {
|
||||
id
|
||||
name
|
||||
aliases
|
||||
}
|
||||
}
|
||||
"""
|
||||
result = callGraphQL(query)
|
||||
normalized_name = name.lower().strip()
|
||||
for tag in result["allTags"]:
|
||||
if tag["name"].lower() == normalized_name:
|
||||
return tag["id"]
|
||||
if tag.get("aliases"):
|
||||
for alias in tag["aliases"]:
|
||||
if alias.lower() == normalized_name:
|
||||
return tag["id"]
|
||||
return None
|
||||
|
||||
def graphql_createMarker(scene_id, title, main_tag, seconds, tags=[]):
|
||||
main_tag_id = graphql_findTagbyName(main_tag)
|
||||
if main_tag_id is None:
|
||||
log.warning("The 'Primary Tag' don't exist ({}), marker won't be created.".format(main_tag))
|
||||
return None
|
||||
log.info("Creating Marker: {}".format(title))
|
||||
query = """
|
||||
mutation SceneMarkerCreate($title: String!, $seconds: Float!, $scene_id: ID!, $primary_tag_id: ID!, $tag_ids: [ID!] = []) {
|
||||
sceneMarkerCreate(
|
||||
input: {
|
||||
title: $title
|
||||
seconds: $seconds
|
||||
scene_id: $scene_id
|
||||
primary_tag_id: $primary_tag_id
|
||||
tag_ids: $tag_ids
|
||||
}
|
||||
) {
|
||||
...SceneMarkerData
|
||||
}
|
||||
}
|
||||
fragment SceneMarkerData on SceneMarker {
|
||||
id
|
||||
title
|
||||
seconds
|
||||
stream
|
||||
preview
|
||||
screenshot
|
||||
scene {
|
||||
id
|
||||
}
|
||||
primary_tag {
|
||||
id
|
||||
name
|
||||
aliases
|
||||
}
|
||||
tags {
|
||||
id
|
||||
name
|
||||
aliases
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {
|
||||
"primary_tag_id": main_tag_id,
|
||||
"scene_id": scene_id,
|
||||
"seconds": seconds,
|
||||
"title": title,
|
||||
"tag_ids": tags
|
||||
}
|
||||
result = callGraphQL(query, variables)
|
||||
return result
|
||||
|
||||
def graphql_getMarker(scene_id):
|
||||
query = """
|
||||
query FindScene($id: ID!, $checksum: String) {
|
||||
findScene(id: $id, checksum: $checksum) {
|
||||
scene_markers {
|
||||
seconds
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {
|
||||
"id": scene_id
|
||||
}
|
||||
result = callGraphQL(query, variables)
|
||||
if result:
|
||||
if result["findScene"].get("scene_markers"):
|
||||
return [x.get("seconds") for x in result["findScene"]["scene_markers"]]
|
||||
return None
|
||||
|
||||
def graphql_getScene(scene_id):
|
||||
query = """
|
||||
query FindScene($id: ID!, $checksum: String) {
|
||||
findScene(id: $id, checksum: $checksum) {
|
||||
files {
|
||||
duration
|
||||
}
|
||||
scene_markers {
|
||||
seconds
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {
|
||||
"id": scene_id
|
||||
}
|
||||
result = callGraphQL(query, variables)
|
||||
if result:
|
||||
return_dict = {}
|
||||
return_dict["duration"] = result["findScene"]["files"][0]["duration"]
|
||||
if result["findScene"].get("scene_markers"):
|
||||
return_dict["marker"] = [x.get("seconds") for x in result["findScene"]["scene_markers"]]
|
||||
else:
|
||||
return_dict["marker"] = None
|
||||
return return_dict
|
||||
return None
|
||||
|
||||
def parse_duration_to_seconds(duration):
|
||||
if duration is None:
|
||||
return None
|
||||
t = datetime.strptime(duration,"%H:%M:%S")
|
||||
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
|
||||
return delta.seconds
|
||||
|
||||
def process_chapters(scene_id, api_json):
|
||||
if scene_id and STASH_API and CREATE_MARKER and api_json != {}:
|
||||
log.debug(f"Processing markers: {json.dumps(api_json.get('markers'))}")
|
||||
markers = api_json.get("markers")
|
||||
if markers:
|
||||
stash_scene_info = graphql_getScene(scene_id)
|
||||
api_scene_duration = None
|
||||
if api_json.get("runLength"):
|
||||
api_scene_duration = api_json.get("runLength")
|
||||
|
||||
log.debug(f"API Duration: {api_scene_duration}")
|
||||
if MARKER_DURATION_MATCH and api_scene_duration is None:
|
||||
log.info("No duration given by the API.")
|
||||
else:
|
||||
log.debug("Stash Len: {}| API Len: {}".format(stash_scene_info["duration"], api_scene_duration))
|
||||
if (MARKER_DURATION_MATCH and api_scene_duration-MARKER_SEC_DIFF <= stash_scene_info["duration"] <= api_scene_duration+MARKER_SEC_DIFF) or (api_scene_duration in [0,1] and MARKER_DURATION_UNSURE):
|
||||
for marker in markers:
|
||||
if stash_scene_info.get("marker"):
|
||||
if marker.get("seconds") in stash_scene_info["marker"]:
|
||||
log.debug("Ignoring marker ({}) because already have with same time.".format(marker.get("seconds")))
|
||||
continue
|
||||
try:
|
||||
graphql_createMarker(scene_id, marker.get("title"), marker.get("title"), marker.get("seconds"))
|
||||
except:
|
||||
log.error("Marker failed to create")
|
||||
else:
|
||||
log.info("The duration of this scene don't match the duration of stash scene.")
|
||||
else:
|
||||
log.info("No offical marker for this scene")
|
||||
|
||||
class Site:
|
||||
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
self.id = name.replace(' ', '').upper()
|
||||
self.api = "https://www." + self.id.lower() + ".com/graphql"
|
||||
self.home = "https://www." + self.id.lower() + ".com"
|
||||
self.search_count = MAX_SCENES
|
||||
|
||||
def isValidURL(self, url: str):
|
||||
u = url.lower().rstrip("/")
|
||||
up = urlparse(u)
|
||||
if up.hostname is None:
|
||||
return False
|
||||
if up.hostname.lstrip("www.").rstrip(".com") == self.id.lower():
|
||||
splits = u.split("/")
|
||||
if len(splits) < 4:
|
||||
return False
|
||||
if splits[-2] == "videos":
|
||||
return True
|
||||
return False
|
||||
|
||||
def getSlug(self, url: str):
|
||||
u = url.lower().rstrip("/")
|
||||
slug = u.split("/")[-1]
|
||||
return slug
|
||||
|
||||
def getScene(self, url: str):
|
||||
log.debug(f"Scraping using {self.name} graphql API")
|
||||
q = {
|
||||
'query': self.getVideoQuery,
|
||||
'operationName': "getVideo",
|
||||
'variables': {
|
||||
"site": self.id,
|
||||
"videoSlug": self.getSlug(url)
|
||||
}
|
||||
}
|
||||
r = self.callGraphQL(query=q, referer=url)
|
||||
return self.parse_scene(r)
|
||||
|
||||
def getSearchResult(self, query: str):
|
||||
log.debug(f"Searching using {self.name} graphql API")
|
||||
q = {
|
||||
'query': self.getSearchQuery,
|
||||
'operationName': "getSearchResults",
|
||||
'variables': {
|
||||
"site": self.id,
|
||||
"query": query,
|
||||
"first": self.search_count
|
||||
}
|
||||
}
|
||||
r = self.callGraphQL(query=q, referer=self.home)
|
||||
return self.parse_search(r)
|
||||
|
||||
def callGraphQL(self, query: dict, referer: str):
|
||||
headers = {
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Referer": referer,
|
||||
"DNT": "1",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0",
|
||||
}
|
||||
if not query:
|
||||
return None
|
||||
|
||||
reattempts = 0
|
||||
while True:
|
||||
try:
|
||||
response = requests.post(self.api, json=query, headers=headers)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get("error"):
|
||||
for error in result["error"]["errors"]:
|
||||
raise Exception(f"GraphQL error: {error}")
|
||||
if reattempts > 0:
|
||||
log.debug(f"Successful query after attempt #{reattempts}")
|
||||
return result
|
||||
elif response.status_code == 403:
|
||||
log.error("GraphQL query recieved a 403 status response")
|
||||
if reattempts < MAX_403_REATTEMPTS:
|
||||
log.debug(f"403 Reattempt {reattempts}/{MAX_403_REATTEMPTS}")
|
||||
else:
|
||||
log.error(f"Reached max 403 errors for GraphQL query")
|
||||
return {}
|
||||
else:
|
||||
raise ConnectionError(
|
||||
f"GraphQL query failed:{response.status_code} - {response.content}"
|
||||
)
|
||||
except Exception as err:
|
||||
log.error(f"GraphqQL query failed {err}")
|
||||
return None
|
||||
|
||||
def parse_scene(self, response):
|
||||
scene = {}
|
||||
if response is None or response.get('data') is None:
|
||||
return scene
|
||||
|
||||
data = response['data'].get('findOneVideo')
|
||||
if data:
|
||||
scene['title'] = data.get('title')
|
||||
scene['details'] = data.get('description')
|
||||
scene['studio'] = {"name": self.name}
|
||||
scene['code'] = data.get('videoId')
|
||||
director = data.get("directors")
|
||||
if director is not None:
|
||||
scene["director"] = ", ".join(d["name"] for d in data.get("directors", []))
|
||||
|
||||
date = data.get('releaseDate')
|
||||
if date:
|
||||
scene['date'] = date.split("T")[0]
|
||||
scene['performers'] = []
|
||||
if data.get('models'):
|
||||
for model in data['models']:
|
||||
scene['performers'].append({"name": model['name']})
|
||||
|
||||
scene['tags'] = []
|
||||
tags = data.get('tags')
|
||||
categories = data.get('categories')
|
||||
if tags == [] and categories:
|
||||
for tag in data['categories']:
|
||||
scene['tags'].append({"name": tag['name']})
|
||||
elif tags:
|
||||
for tag in data['tags']:
|
||||
scene['tags'].append({"name": tag})
|
||||
|
||||
if data.get('images'):
|
||||
if data['images'].get('poster'):
|
||||
maxWidth = 0
|
||||
for image in data['images']['poster']:
|
||||
if image['width'] > maxWidth:
|
||||
scene['image'] = image['src']
|
||||
maxWidth = image['width']
|
||||
if url:
|
||||
scene["url"] = url
|
||||
|
||||
scene['runLength'] = parse_duration_to_seconds(data.get("runLength"))
|
||||
|
||||
markers = data.get('chapters', {}).get('video')
|
||||
if markers:
|
||||
scene["markers"] = markers
|
||||
|
||||
return scene
|
||||
return None
|
||||
|
||||
def parse_search(self, response):
|
||||
search_result = []
|
||||
|
||||
if response is None or response.get('data') is None:
|
||||
return search_result
|
||||
|
||||
data = response['data'].get('searchVideos')
|
||||
if data:
|
||||
for scene in data["edges"]:
|
||||
scene = scene.get("node")
|
||||
if scene:
|
||||
slug = scene.get('slug')
|
||||
# search results without a url are useless
|
||||
# only add results with a slug present
|
||||
if slug:
|
||||
sc = {}
|
||||
sc['title'] = scene.get('title')
|
||||
sc['details'] = scene.get('description')
|
||||
sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}"
|
||||
sc['code'] = scene.get('videoId')
|
||||
sc['studio'] = {"name": self.name}
|
||||
date = scene.get('releaseDate')
|
||||
if date:
|
||||
sc['date'] = date.split("T")[0]
|
||||
sc['performers'] = []
|
||||
if scene.get('modelsSlugged'):
|
||||
for model in scene['modelsSlugged']:
|
||||
sc['performers'].append(
|
||||
{"name": model['name']})
|
||||
if scene.get('images'):
|
||||
if scene['images'].get('listing'):
|
||||
maxWidth = 0
|
||||
for image in scene['images']['listing']:
|
||||
if image['width'] > maxWidth:
|
||||
sc['image'] = image['src']
|
||||
maxWidth = image['width']
|
||||
search_result.append(sc)
|
||||
return search_result
|
||||
return None
|
||||
|
||||
@property
|
||||
def length(self):
|
||||
return len(self.id)
|
||||
|
||||
getVideoQuery = """
|
||||
query getVideo($videoSlug: String, $site: Site) {
|
||||
findOneVideo(input: {slug: $videoSlug, site: $site}) {
|
||||
title
|
||||
description
|
||||
releaseDate
|
||||
models {
|
||||
name
|
||||
}
|
||||
videoId
|
||||
directors {
|
||||
name
|
||||
}
|
||||
images {
|
||||
poster {
|
||||
src
|
||||
width
|
||||
}
|
||||
}
|
||||
tags
|
||||
categories {
|
||||
name
|
||||
}
|
||||
runLength
|
||||
chapters {
|
||||
video {
|
||||
title
|
||||
seconds
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
getSearchQuery = """
|
||||
query getSearchResults($query: String!, $site: Site!, $first: Int) {
|
||||
searchVideos(input: { query: $query, site: $site, first: $first }) {
|
||||
edges {
|
||||
node {
|
||||
description
|
||||
title
|
||||
slug
|
||||
releaseDate
|
||||
modelsSlugged: models {
|
||||
name
|
||||
slugged: slug
|
||||
}
|
||||
videoId
|
||||
images {
|
||||
listing {
|
||||
src
|
||||
width
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
studios = {
|
||||
Site('Blacked Raw'),
|
||||
Site('Blacked'),
|
||||
Site('Deeper'),
|
||||
Site('Milfy'),
|
||||
Site('Tushy'),
|
||||
Site('Tushy Raw'),
|
||||
Site('Slayed'),
|
||||
Site('Vixen')
|
||||
}
|
||||
|
||||
frag = json.loads(sys.stdin.read())
|
||||
search_query = frag.get("name")
|
||||
url = frag.get("url")
|
||||
scene_id = frag.get("id")
|
||||
|
||||
def check_alternate_urls(site):
|
||||
for u in frag.get("urls", []):
|
||||
if site.isValidURL(u):
|
||||
return u
|
||||
return None
|
||||
|
||||
#sceneByURL
|
||||
if url:
|
||||
for x in studios:
|
||||
proper_url = None
|
||||
if x.isValidURL(url):
|
||||
proper_url = url
|
||||
else:
|
||||
proper_url = check_alternate_urls(site=x)
|
||||
|
||||
if proper_url != None:
|
||||
s = x.getScene(proper_url)
|
||||
# log.info(f"{json.dumps(s)}")
|
||||
process_chapters(scene_id=scene_id, api_json=s)
|
||||
|
||||
# drop unwanted keys from json result
|
||||
s.pop('runLength', None)
|
||||
s.pop('markers', None)
|
||||
|
||||
print(json.dumps(s))
|
||||
sys.exit(0)
|
||||
log.error(f"URL: {url} is not supported")
|
||||
print("{}")
|
||||
sys.exit(1)
|
||||
|
||||
#sceneByName
|
||||
if search_query and "search" in sys.argv:
|
||||
search_query = search_query.lower()
|
||||
lst = []
|
||||
wanted = []
|
||||
|
||||
# Only search on specific site if the studio name is in the search query
|
||||
# ('Ariana Vixen Cecilia' will search only on Vixen)
|
||||
|
||||
# if the first character is $, filter will be ignored.
|
||||
if search_query[0] != "$":
|
||||
# make sure longer matches are filtered first
|
||||
studios_sorted = sorted(studios, reverse=True, key=lambda s: s.length)
|
||||
for x in studios_sorted:
|
||||
if x.id.lower() in search_query:
|
||||
wanted.append(x.id.lower())
|
||||
continue
|
||||
# remove the studio from the search result
|
||||
search_query = search_query.replace(x.id.lower(), "")
|
||||
else:
|
||||
search_query = search_query[1:]
|
||||
|
||||
if wanted:
|
||||
log.info(f"Filter: {wanted} applied")
|
||||
|
||||
log.debug(f"Query: '{search_query}'")
|
||||
|
||||
for x in studios:
|
||||
if wanted:
|
||||
if x.id.lower() not in wanted:
|
||||
log.debug(f"[Filter] ignoring {x.id}")
|
||||
continue
|
||||
s = x.getSearchResult(search_query)
|
||||
# merge all list into one
|
||||
if s:
|
||||
lst.extend(s)
|
||||
#log.debug(f"{json.dumps(lst)}")
|
||||
print(json.dumps(lst))
|
||||
sys.exit(0)
|
||||
Reference in New Issue
Block a user