compose-projects-arr/stash/config/scrapers/community/RealityLovers/RealityLovers.py

import json
import sys
import re
from urllib.parse import urlparse
import requests
from datetime import datetime

# initialize the session for making requests
session = requests.session()

try:
    import py_common.log as log
except ModuleNotFoundError:
    print(
        "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
        file=sys.stderr,
    )
    sys.exit(1)


#  --------------------------------------------
# This is a scraper for: RealityLovers sites
#


def performerByURL():
    # read the input.  A URL must be passed in for the sceneByURL call
    inp = json.loads(sys.stdin.read())
    actor_id = re.sub(r".*/([0-9]*)/.*", r"\1", inp["url"])
    if not actor_id:
        log.error("No actor ID found in URL")
        return {}

    domain = urlparse(inp["url"]).netloc.replace("www.", "")
    api_url = f"https://engine.{domain}/content/actor?actorId={actor_id}"

    # Making some assumptions here
    gender = "TRANSGENDER_FEMALE" if "tsvirtuallovers" in domain else "FEMALE"

    scraped = session.get(api_url)
    scraped.raise_for_status()
    log.trace("Scraped the url: " + api_url)

    data = scraped.json()

    performer = {
        "name": data["name"],
        "image": re.sub(
            r".*,(\S+).*", r"\1", data["screenshots"][0]["galleryImgSrcSet"]
        ),
        "gender": gender,
    }
    if birthdate := data.get("birthDay"):
        performer["birthdate"] = birthdate
    if country := data.get("country"):
        performer["country"] = country
    if measurements := data.get("cupSize"):
        performer["measurements"] = measurements
    if height := data.get("height"):
        performer["height"] = height
    if weight := data.get("weight"):
        performer["weight"] = weight
    if details := data.get("description"):
        performer["details"] = details
    if tags := [{"name": x["name"]} for x in data.get("categories", [])]:
        performer["tags"] = tags
    if twitter := data.get("twitterLink"):
        performer["twitter"] = twitter
    if instagram := data.get("instagramLink"):
        performer["instagram"] = instagram

    return performer


def sceneByURL():
    # read the input.  A URL must be passed in for the sceneByURL call
    inp = json.loads(sys.stdin.read())
    scene_id = re.sub(r".*/([0-9]*)/.*", r"\1", inp["url"])
    if not scene_id:
        log.error("No scene ID found in URL")
        return {}
    domain = urlparse(inp["url"]).netloc.replace("www.", "")
    studio = "Reality Lovers"
    if "tsvirtuallovers" in domain:
        studio = "TS Virtual Lovers"

    api_url = f"https://engine.{domain}/content/videoDetail?contentId={scene_id}"
    scraped = session.get(api_url)
    if scraped.status_code >= 400:
        log.error("HTTP Error: %s" % scraped.status_code)
        return {}
    log.trace("Scraped the url: " + api_url)

    data = scraped.json()
    # log.debug(json.dumps(data))

    title = re.sub(r'\s+VR Porn Video$', '', data["title"])
    details = data["description"]

    # image
    image_urls = [
        src_set.split(' ')[0] for src_set in data["mainImages"][0]["imgSrcSet"].split(',')
    ]
    # pick highest available quality, or fall back to first image
    suffixes = ['ultra.jpg', 'big.jpg', 'high.jpg', 'medium.jpg', 'small.jpg']
    image_url = next((url for suffix in suffixes for url in image_urls if url.endswith(suffix)), image_urls[0])
    date = data["releaseDate"]

    # tags
    tags = [{"name": x["name"]} for x in data["categories"]]
    tags.append({'name': 'Virtual Reality'})
    if data["mainImages"][0]["perspective"] == 'VOYEUR':
        tags.extend([{'name': 'Non-POV'}, {'name': 'Voyeur'}])

    actors = [
        {"name": x["name"], "url": f"https://{domain}/{x['uri']}"}
        for x in data["starring"]
    ]

    # create our output
    return {
        "title": title,
        "date": date,
        "tags": tags,
        "details": details,
        "image": image_url,
        "studio": {"name": studio},
        "performers": actors,
    }


# Get the scene by the fragment.  The title is used as the search field.  Should return the JSON response.
def sceneByName():
    # read the input.  A title or name must be passed in
    inp = json.loads(sys.stdin.read())
    log.trace("Input: " + json.dumps(inp))
    query_value = inp["title"] if "title" in inp else inp["name"]
    if not query_value:
        log.error("No title or name Entered")
        return []
    log.trace("Query Value: " + query_value)

    # No way to know if the user wanted to search realitylovers or tsvirtuallovers, so search both
    raw_scenes = []
    for domain in ("realitylovers.com", "tsvirtuallovers.com"):
        api_url = f"https://engine.{domain}/content/search?max=100000&page=0&pornstar=0&category=0&s={query_value}"
        scraped_scenes = session.get(api_url)
        scraped_scenes.raise_for_status()
        scenes = scraped_scenes.json()
        new_scenes = [{"domain": domain, **s} for s in scenes["contents"]]
        log.debug(f"Found {len(new_scenes)} scenes from {domain}")
        raw_scenes.extend(new_scenes)

    results = []
    for scene in raw_scenes:
        # Parse the date published.  Get rid of the 'st' (like in 1st) via a regex. ex: "Sep 27th 2018"
        cleandate = re.sub(r"(st|nd|rd|th)", r"", scene["released"])
        date = datetime.strptime(cleandate, "%b %d %Y").strftime("%Y-%m-%d")
        main_image_src = re.sub(r".*1x,(.*) 2x", r"\1", scene["mainImageSrcset"])
        # Add the new scene to the results
        results.append(
            {
                "Title": scene["title"],
                "URL": f"https://{scene['domain']}/{scene['videoUri']}",
                "Image": main_image_src,
                "Date": date,
            }
        )

    return results


# Figure out what was invoked by Stash and call the correct thing
if sys.argv[1] == "performerByURL":
    print(json.dumps(performerByURL()))
elif sys.argv[1] in ("sceneByURL", "sceneByQueryFragment"):
    print(json.dumps(sceneByURL()))
elif sys.argv[1] == "sceneByName":
    scenes = sceneByName()
    print(json.dumps(scenes))
elif sys.argv[1] == "sceneByFragment":
    scenes = sceneByName()
    if len(scenes) > 0:
        # return the first query result
        print(json.dumps(scenes[0]))
    else:
        # empty array for no results
        log.info("No results")
        print("{}")
else:
    log.error("Unknown argument passed: " + sys.argv[1])
    print(json.dumps({}))