193 lines
6.4 KiB
Python
193 lines
6.4 KiB
Python
import json
|
|
import sys
|
|
import re
|
|
from urllib.parse import urlparse
|
|
import requests
|
|
from datetime import datetime
|
|
|
|
# initialize the session for making requests
|
|
session = requests.session()
|
|
|
|
try:
|
|
import py_common.log as log
|
|
except ModuleNotFoundError:
|
|
print(
|
|
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
|
|
# --------------------------------------------
|
|
# This is a scraper for: RealityLovers sites
|
|
#
|
|
|
|
|
|
def performerByURL():
|
|
# read the input. A URL must be passed in for the sceneByURL call
|
|
inp = json.loads(sys.stdin.read())
|
|
actor_id = re.sub(r".*/([0-9]*)/.*", r"\1", inp["url"])
|
|
if not actor_id:
|
|
log.error("No actor ID found in URL")
|
|
return {}
|
|
|
|
domain = urlparse(inp["url"]).netloc.replace("www.", "")
|
|
api_url = f"https://engine.{domain}/content/actor?actorId={actor_id}"
|
|
|
|
# Making some assumptions here
|
|
gender = "TRANSGENDER_FEMALE" if "tsvirtuallovers" in domain else "FEMALE"
|
|
|
|
scraped = session.get(api_url)
|
|
scraped.raise_for_status()
|
|
log.trace("Scraped the url: " + api_url)
|
|
|
|
data = scraped.json()
|
|
|
|
performer = {
|
|
"name": data["name"],
|
|
"image": re.sub(
|
|
r".*,(\S+).*", r"\1", data["screenshots"][0]["galleryImgSrcSet"]
|
|
),
|
|
"gender": gender,
|
|
}
|
|
if birthdate := data.get("birthDay"):
|
|
performer["birthdate"] = birthdate
|
|
if country := data.get("country"):
|
|
performer["country"] = country
|
|
if measurements := data.get("cupSize"):
|
|
performer["measurements"] = measurements
|
|
if height := data.get("height"):
|
|
performer["height"] = height
|
|
if weight := data.get("weight"):
|
|
performer["weight"] = weight
|
|
if details := data.get("description"):
|
|
performer["details"] = details
|
|
if tags := [{"name": x["name"]} for x in data.get("categories", [])]:
|
|
performer["tags"] = tags
|
|
if twitter := data.get("twitterLink"):
|
|
performer["twitter"] = twitter
|
|
if instagram := data.get("instagramLink"):
|
|
performer["instagram"] = instagram
|
|
|
|
return performer
|
|
|
|
|
|
def sceneByURL():
|
|
# read the input. A URL must be passed in for the sceneByURL call
|
|
inp = json.loads(sys.stdin.read())
|
|
scene_id = re.sub(r".*/([0-9]*)/.*", r"\1", inp["url"])
|
|
if not scene_id:
|
|
log.error("No scene ID found in URL")
|
|
return {}
|
|
domain = urlparse(inp["url"]).netloc.replace("www.", "")
|
|
studio = "Reality Lovers"
|
|
if "tsvirtuallovers" in domain:
|
|
studio = "TS Virtual Lovers"
|
|
|
|
api_url = f"https://engine.{domain}/content/videoDetail?contentId={scene_id}"
|
|
scraped = session.get(api_url)
|
|
if scraped.status_code >= 400:
|
|
log.error("HTTP Error: %s" % scraped.status_code)
|
|
return {}
|
|
log.trace("Scraped the url: " + api_url)
|
|
|
|
data = scraped.json()
|
|
# log.debug(json.dumps(data))
|
|
|
|
title = re.sub(r'\s+VR Porn Video$', '', data["title"])
|
|
details = data["description"]
|
|
|
|
# image
|
|
image_urls = [
|
|
src_set.split(' ')[0] for src_set in data["mainImages"][0]["imgSrcSet"].split(',')
|
|
]
|
|
# pick highest available quality, or fall back to first image
|
|
suffixes = ['ultra.jpg', 'big.jpg', 'high.jpg', 'medium.jpg', 'small.jpg']
|
|
image_url = next((url for suffix in suffixes for url in image_urls if url.endswith(suffix)), image_urls[0])
|
|
date = data["releaseDate"]
|
|
|
|
# tags
|
|
tags = [{"name": x["name"]} for x in data["categories"]]
|
|
tags.append({'name': 'Virtual Reality'})
|
|
if data["mainImages"][0]["perspective"] == 'VOYEUR':
|
|
tags.extend([{'name': 'Non-POV'}, {'name': 'Voyeur'}])
|
|
|
|
actors = [
|
|
{"name": x["name"], "url": f"https://{domain}/{x['uri']}"}
|
|
for x in data["starring"]
|
|
]
|
|
|
|
# create our output
|
|
return {
|
|
"title": title,
|
|
"date": date,
|
|
"tags": tags,
|
|
"details": details,
|
|
"image": image_url,
|
|
"studio": {"name": studio},
|
|
"performers": actors,
|
|
}
|
|
|
|
|
|
# Get the scene by the fragment. The title is used as the search field. Should return the JSON response.
|
|
def sceneByName():
|
|
# read the input. A title or name must be passed in
|
|
inp = json.loads(sys.stdin.read())
|
|
log.trace("Input: " + json.dumps(inp))
|
|
query_value = inp["title"] if "title" in inp else inp["name"]
|
|
if not query_value:
|
|
log.error("No title or name Entered")
|
|
return []
|
|
log.trace("Query Value: " + query_value)
|
|
|
|
# No way to know if the user wanted to search realitylovers or tsvirtuallovers, so search both
|
|
raw_scenes = []
|
|
for domain in ("realitylovers.com", "tsvirtuallovers.com"):
|
|
api_url = f"https://engine.{domain}/content/search?max=100000&page=0&pornstar=0&category=0&s={query_value}"
|
|
scraped_scenes = session.get(api_url)
|
|
scraped_scenes.raise_for_status()
|
|
scenes = scraped_scenes.json()
|
|
new_scenes = [{"domain": domain, **s} for s in scenes["contents"]]
|
|
log.debug(f"Found {len(new_scenes)} scenes from {domain}")
|
|
raw_scenes.extend(new_scenes)
|
|
|
|
results = []
|
|
for scene in raw_scenes:
|
|
# Parse the date published. Get rid of the 'st' (like in 1st) via a regex. ex: "Sep 27th 2018"
|
|
cleandate = re.sub(r"(st|nd|rd|th)", r"", scene["released"])
|
|
date = datetime.strptime(cleandate, "%b %d %Y").strftime("%Y-%m-%d")
|
|
main_image_src = re.sub(r".*1x,(.*) 2x", r"\1", scene["mainImageSrcset"])
|
|
# Add the new scene to the results
|
|
results.append(
|
|
{
|
|
"Title": scene["title"],
|
|
"URL": f"https://{scene['domain']}/{scene['videoUri']}",
|
|
"Image": main_image_src,
|
|
"Date": date,
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
# Figure out what was invoked by Stash and call the correct thing
|
|
if sys.argv[1] == "performerByURL":
|
|
print(json.dumps(performerByURL()))
|
|
elif sys.argv[1] in ("sceneByURL", "sceneByQueryFragment"):
|
|
print(json.dumps(sceneByURL()))
|
|
elif sys.argv[1] == "sceneByName":
|
|
scenes = sceneByName()
|
|
print(json.dumps(scenes))
|
|
elif sys.argv[1] == "sceneByFragment":
|
|
scenes = sceneByName()
|
|
if len(scenes) > 0:
|
|
# return the first query result
|
|
print(json.dumps(scenes[0]))
|
|
else:
|
|
# empty array for no results
|
|
log.info("No results")
|
|
print("{}")
|
|
else:
|
|
log.error("Unknown argument passed: " + sys.argv[1])
|
|
print(json.dumps({}))
|