This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
id: vixenNetwork
name: Vixen Media Group
metadata: {}
version: 3eb4bdf
date: "2024-01-07 03:22:54"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- vixenNetwork.yml
- vixenNetwork.py

View File

@@ -0,0 +1,578 @@
import json
import os
import sys
from urllib.parse import urlparse
from datetime import datetime, timedelta
# to import from a parent directory we need to add that directory to the system path
csd = os.path.dirname(
os.path.realpath(__file__)) # get current script directory
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
sys.path.append(
parent
) # add parent dir to sys path so that we can import py_common from there
try:
import requests
except ModuleNotFoundError:
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
sys.exit()
try:
import py_common.log as log
except ModuleNotFoundError:
print("You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
sys.exit()
# Max number of scenes that a site can return for the search.
MAX_SCENES = 6
# Marker
# If you want to create a marker while Scraping.
CREATE_MARKER = False
# Only create marker if the durations match (API vs Stash)
MARKER_DURATION_MATCH = True
# Sometimes the API duration is 0/1, so we can't really know if this matches. True if you want to create anyways
MARKER_DURATION_UNSURE = True
# Max allowed difference (seconds) in scene length between Stash & API.
MARKER_SEC_DIFF = 10
# Tags you don't want to see in the Scraper window.
IGNORE_TAGS = ["Sex","Feature","HD","Big Dick"]
# Tags you want to add in the Scraper window.
FIXED_TAGS = ""
# Check the SSL Certificate.
CHECK_SSL_CERT = True
# Local folder with JSON inside (Only used if scene isn't found from the API)
LOCAL_PATH = r""
SERVER_IP = "http://localhost:9999"
# API key (Settings > Configuration > Authentication)
STASH_API = ""
# Automatically reattempt GraphQL queries to Vixen sites which fail with a 403 response
MAX_403_REATTEMPTS = 20
SERVER_URL = SERVER_IP + "/graphql"
def callGraphQL(query, variables=None):
headers = {
"Accept-Encoding": "gzip, deflate, br",
"Content-Type": "application/json",
"Accept": "application/json",
"Connection": "keep-alive",
"DNT": "1",
"ApiKey": STASH_API
}
json = {'query': query}
if variables is not None:
json['variables'] = variables
try:
response = requests.post(SERVER_URL, json=json, headers=headers)
if response.status_code == 200:
result = response.json()
if result.get("error"):
for error in result["error"]["errors"]:
raise Exception("GraphQL error: {}".format(error))
if result.get("data"):
return result.get("data")
elif response.status_code == 401:
log.error("[GraphQL] HTTP Error 401, Unauthorised.")
return None
else:
raise ConnectionError("GraphQL query failed:{} - {}".format(response.status_code, response.content))
except Exception as err:
log.error(err)
return None
def graphql_findTagbyName(name):
query = """
query {
allTags {
id
name
aliases
}
}
"""
result = callGraphQL(query)
normalized_name = name.lower().strip()
for tag in result["allTags"]:
if tag["name"].lower() == normalized_name:
return tag["id"]
if tag.get("aliases"):
for alias in tag["aliases"]:
if alias.lower() == normalized_name:
return tag["id"]
return None
def graphql_createMarker(scene_id, title, main_tag, seconds, tags=[]):
main_tag_id = graphql_findTagbyName(main_tag)
if main_tag_id is None:
log.warning("The 'Primary Tag' don't exist ({}), marker won't be created.".format(main_tag))
return None
log.info("Creating Marker: {}".format(title))
query = """
mutation SceneMarkerCreate($title: String!, $seconds: Float!, $scene_id: ID!, $primary_tag_id: ID!, $tag_ids: [ID!] = []) {
sceneMarkerCreate(
input: {
title: $title
seconds: $seconds
scene_id: $scene_id
primary_tag_id: $primary_tag_id
tag_ids: $tag_ids
}
) {
...SceneMarkerData
}
}
fragment SceneMarkerData on SceneMarker {
id
title
seconds
stream
preview
screenshot
scene {
id
}
primary_tag {
id
name
aliases
}
tags {
id
name
aliases
}
}
"""
variables = {
"primary_tag_id": main_tag_id,
"scene_id": scene_id,
"seconds": seconds,
"title": title,
"tag_ids": tags
}
result = callGraphQL(query, variables)
return result
def graphql_getMarker(scene_id):
query = """
query FindScene($id: ID!, $checksum: String) {
findScene(id: $id, checksum: $checksum) {
scene_markers {
seconds
}
}
}
"""
variables = {
"id": scene_id
}
result = callGraphQL(query, variables)
if result:
if result["findScene"].get("scene_markers"):
return [x.get("seconds") for x in result["findScene"]["scene_markers"]]
return None
def graphql_getScene(scene_id):
query = """
query FindScene($id: ID!, $checksum: String) {
findScene(id: $id, checksum: $checksum) {
files {
duration
}
scene_markers {
seconds
}
}
}
"""
variables = {
"id": scene_id
}
result = callGraphQL(query, variables)
if result:
return_dict = {}
return_dict["duration"] = result["findScene"]["files"][0]["duration"]
if result["findScene"].get("scene_markers"):
return_dict["marker"] = [x.get("seconds") for x in result["findScene"]["scene_markers"]]
else:
return_dict["marker"] = None
return return_dict
return None
def parse_duration_to_seconds(duration):
if duration is None:
return None
t = datetime.strptime(duration,"%H:%M:%S")
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
return delta.seconds
def process_chapters(scene_id, api_json):
if scene_id and STASH_API and CREATE_MARKER and api_json != {}:
log.debug(f"Processing markers: {json.dumps(api_json.get('markers'))}")
markers = api_json.get("markers")
if markers:
stash_scene_info = graphql_getScene(scene_id)
api_scene_duration = None
if api_json.get("runLength"):
api_scene_duration = api_json.get("runLength")
log.debug(f"API Duration: {api_scene_duration}")
if MARKER_DURATION_MATCH and api_scene_duration is None:
log.info("No duration given by the API.")
else:
log.debug("Stash Len: {}| API Len: {}".format(stash_scene_info["duration"], api_scene_duration))
if (MARKER_DURATION_MATCH and api_scene_duration-MARKER_SEC_DIFF <= stash_scene_info["duration"] <= api_scene_duration+MARKER_SEC_DIFF) or (api_scene_duration in [0,1] and MARKER_DURATION_UNSURE):
for marker in markers:
if stash_scene_info.get("marker"):
if marker.get("seconds") in stash_scene_info["marker"]:
log.debug("Ignoring marker ({}) because already have with same time.".format(marker.get("seconds")))
continue
try:
graphql_createMarker(scene_id, marker.get("title"), marker.get("title"), marker.get("seconds"))
except:
log.error("Marker failed to create")
else:
log.info("The duration of this scene don't match the duration of stash scene.")
else:
log.info("No offical marker for this scene")
class Site:
def __init__(self, name: str):
self.name = name
self.id = name.replace(' ', '').upper()
self.api = "https://www." + self.id.lower() + ".com/graphql"
self.home = "https://www." + self.id.lower() + ".com"
self.search_count = MAX_SCENES
def isValidURL(self, url: str):
u = url.lower().rstrip("/")
up = urlparse(u)
if up.hostname is None:
return False
if up.hostname.lstrip("www.").rstrip(".com") == self.id.lower():
splits = u.split("/")
if len(splits) < 4:
return False
if splits[-2] == "videos":
return True
return False
def getSlug(self, url: str):
u = url.lower().rstrip("/")
slug = u.split("/")[-1]
return slug
def getScene(self, url: str):
log.debug(f"Scraping using {self.name} graphql API")
q = {
'query': self.getVideoQuery,
'operationName': "getVideo",
'variables': {
"site": self.id,
"videoSlug": self.getSlug(url)
}
}
r = self.callGraphQL(query=q, referer=url)
return self.parse_scene(r)
def getSearchResult(self, query: str):
log.debug(f"Searching using {self.name} graphql API")
q = {
'query': self.getSearchQuery,
'operationName': "getSearchResults",
'variables': {
"site": self.id,
"query": query,
"first": self.search_count
}
}
r = self.callGraphQL(query=q, referer=self.home)
return self.parse_search(r)
def callGraphQL(self, query: dict, referer: str):
headers = {
"Accept-Encoding": "gzip, deflate",
"Content-Type": "application/json",
"Accept": "application/json",
"Referer": referer,
"DNT": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0",
}
if not query:
return None
reattempts = 0
while True:
try:
response = requests.post(self.api, json=query, headers=headers)
if response.status_code == 200:
result = response.json()
if result.get("error"):
for error in result["error"]["errors"]:
raise Exception(f"GraphQL error: {error}")
if reattempts > 0:
log.debug(f"Successful query after attempt #{reattempts}")
return result
elif response.status_code == 403:
log.error("GraphQL query recieved a 403 status response")
if reattempts < MAX_403_REATTEMPTS:
log.debug(f"403 Reattempt {reattempts}/{MAX_403_REATTEMPTS}")
else:
log.error(f"Reached max 403 errors for GraphQL query")
return {}
else:
raise ConnectionError(
f"GraphQL query failed:{response.status_code} - {response.content}"
)
except Exception as err:
log.error(f"GraphqQL query failed {err}")
return None
def parse_scene(self, response):
scene = {}
if response is None or response.get('data') is None:
return scene
data = response['data'].get('findOneVideo')
if data:
scene['title'] = data.get('title')
scene['details'] = data.get('description')
scene['studio'] = {"name": self.name}
scene['code'] = data.get('videoId')
director = data.get("directors")
if director is not None:
scene["director"] = ", ".join(d["name"] for d in data.get("directors", []))
date = data.get('releaseDate')
if date:
scene['date'] = date.split("T")[0]
scene['performers'] = []
if data.get('models'):
for model in data['models']:
scene['performers'].append({"name": model['name']})
scene['tags'] = []
tags = data.get('tags')
categories = data.get('categories')
if tags == [] and categories:
for tag in data['categories']:
scene['tags'].append({"name": tag['name']})
elif tags:
for tag in data['tags']:
scene['tags'].append({"name": tag})
if data.get('images'):
if data['images'].get('poster'):
maxWidth = 0
for image in data['images']['poster']:
if image['width'] > maxWidth:
scene['image'] = image['src']
maxWidth = image['width']
if url:
scene["url"] = url
scene['runLength'] = parse_duration_to_seconds(data.get("runLength"))
markers = data.get('chapters', {}).get('video')
if markers:
scene["markers"] = markers
return scene
return None
def parse_search(self, response):
search_result = []
if response is None or response.get('data') is None:
return search_result
data = response['data'].get('searchVideos')
if data:
for scene in data["edges"]:
scene = scene.get("node")
if scene:
slug = scene.get('slug')
# search results without a url are useless
# only add results with a slug present
if slug:
sc = {}
sc['title'] = scene.get('title')
sc['details'] = scene.get('description')
sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}"
sc['code'] = scene.get('videoId')
sc['studio'] = {"name": self.name}
date = scene.get('releaseDate')
if date:
sc['date'] = date.split("T")[0]
sc['performers'] = []
if scene.get('modelsSlugged'):
for model in scene['modelsSlugged']:
sc['performers'].append(
{"name": model['name']})
if scene.get('images'):
if scene['images'].get('listing'):
maxWidth = 0
for image in scene['images']['listing']:
if image['width'] > maxWidth:
sc['image'] = image['src']
maxWidth = image['width']
search_result.append(sc)
return search_result
return None
@property
def length(self):
return len(self.id)
getVideoQuery = """
query getVideo($videoSlug: String, $site: Site) {
findOneVideo(input: {slug: $videoSlug, site: $site}) {
title
description
releaseDate
models {
name
}
videoId
directors {
name
}
images {
poster {
src
width
}
}
tags
categories {
name
}
runLength
chapters {
video {
title
seconds
}
}
}
}
"""
getSearchQuery = """
query getSearchResults($query: String!, $site: Site!, $first: Int) {
searchVideos(input: { query: $query, site: $site, first: $first }) {
edges {
node {
description
title
slug
releaseDate
modelsSlugged: models {
name
slugged: slug
}
videoId
images {
listing {
src
width
}
}
}
}
}
}
"""
studios = {
Site('Blacked Raw'),
Site('Blacked'),
Site('Deeper'),
Site('Milfy'),
Site('Tushy'),
Site('Tushy Raw'),
Site('Slayed'),
Site('Vixen')
}
frag = json.loads(sys.stdin.read())
search_query = frag.get("name")
url = frag.get("url")
scene_id = frag.get("id")
def check_alternate_urls(site):
for u in frag.get("urls", []):
if site.isValidURL(u):
return u
return None
#sceneByURL
if url:
for x in studios:
proper_url = None
if x.isValidURL(url):
proper_url = url
else:
proper_url = check_alternate_urls(site=x)
if proper_url != None:
s = x.getScene(proper_url)
# log.info(f"{json.dumps(s)}")
process_chapters(scene_id=scene_id, api_json=s)
# drop unwanted keys from json result
s.pop('runLength', None)
s.pop('markers', None)
print(json.dumps(s))
sys.exit(0)
log.error(f"URL: {url} is not supported")
print("{}")
sys.exit(1)
#sceneByName
if search_query and "search" in sys.argv:
search_query = search_query.lower()
lst = []
wanted = []
# Only search on specific site if the studio name is in the search query
# ('Ariana Vixen Cecilia' will search only on Vixen)
# if the first character is $, filter will be ignored.
if search_query[0] != "$":
# make sure longer matches are filtered first
studios_sorted = sorted(studios, reverse=True, key=lambda s: s.length)
for x in studios_sorted:
if x.id.lower() in search_query:
wanted.append(x.id.lower())
continue
# remove the studio from the search result
search_query = search_query.replace(x.id.lower(), "")
else:
search_query = search_query[1:]
if wanted:
log.info(f"Filter: {wanted} applied")
log.debug(f"Query: '{search_query}'")
for x in studios:
if wanted:
if x.id.lower() not in wanted:
log.debug(f"[Filter] ignoring {x.id}")
continue
s = x.getSearchResult(search_query)
# merge all list into one
if s:
lst.extend(s)
#log.debug(f"{json.dumps(lst)}")
print(json.dumps(lst))
sys.exit(0)

View File

@@ -0,0 +1,41 @@
name: "Vixen Media Group"
# requires: py_common
sceneByURL:
- url:
- blacked.com/videos
- blackedraw.com/videos
- deeper.com/videos
- milfy.com/videos
- slayed.com/videos
- tushy.com/videos
- tushyraw.com/videos
- vixen.com/videos
action: script
script:
- python
- vixenNetwork.py
# If your search query contains the studio name, the scraper will only search on that site.
# 'Ariana Vixen Cecilia' will only search on Vixen for 'Ariana Cecilia`
# If your search query starts with a '$' all sites will be searched
# '$Ariana Vixen Cecilia' will search in all sites for 'Ariana Vixen Cecilia'
sceneByName:
action: script
script:
- python
- vixenNetwork.py
- search
sceneByQueryFragment:
action: script
script:
- python
- vixenNetwork.py
sceneByFragment:
action: script
script:
- python
- vixenNetwork.py
# Last Updated December 30, 2023