stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/Jellyfin/Jellyfin.py
+++ b/stash/config/scrapers/community/Jellyfin/Jellyfin.py
@@ -0,0 +1,296 @@
+import re
+import requests
+import sys
+import json
+from urllib.parse import urlparse, urlencode, quote
+from datetime import timedelta
+
+import py_common.log as log
+from py_common.config import get_config
+from py_common.util import scraper_args
+from py_common.types import ScrapedPerformer, ScrapedScene, ScrapedMovie
+
+
+# DO NOT EDIT THIS FILE
+# run the scraper once and edit the config.ini file instead
+config = get_config(
+    default="""# Jellyfin endpoint, change this if Jellyfin is not running on the same server as Stash
+host = http://localhost:8096/
+
+# Jellyfin API Key: generate one in Jellyfin Dashboard -> Advanced -> API Keys
+api_key = xxxxxxxxxxxxxxxxxxxxxxxx
+
+# Jellyfin user-id (extract from Jellyfin->Admin->User from the URL of the User)
+# for example: http://localhost:8096/web/index.html#!/myprofile.html?userId=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+userid = xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+"""
+)
+
+errors = []
+
+if "x" in config.api_key:
+    errors.append("api_key")
+
+if "x" in config.userid:
+    errors.append("userid")
+
+if errors:
+    log.error(f"Please configure {' and '.join(errors)} in your config.ini")
+    exit(1)
+
+base_params = {"api_key": config.api_key, "userid": config.userid}
+base_url = urlparse(config.host)._replace(query=urlencode(base_params))
+
+
+def to_scraped_performer(item: dict) -> ScrapedPerformer:
+    performer: ScrapedPerformer = {
+        "name": item["Name"],
+        "url": base_url._replace(path=f"Persons/{quote(item['Name'])}").geturl(),
+        "images": [
+            base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl()
+        ],
+    }
+
+    if birthdate := item.get("PremiereDate"):
+        performer["birthdate"] = birthdate[:10]
+
+    if aliases := item.get("OriginalTitle"):
+        performer["aliases"] = aliases
+
+    if country := item.get("ProductionLocations"):
+        performer["country"] = country[0].split(",")[-1].strip()
+
+    if tags := item.get("Tags"):
+        performer["tags"] = [{"name": tag} for tag in tags]
+
+    return performer
+
+
+def to_scraped_scene(item: dict) -> ScrapedScene:
+    scene: ScrapedScene = {
+        "title": item["Name"],
+        "url": base_url._replace(path=f"Items/{item['Id']}").geturl(),
+        "image": base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl(),
+    }
+
+    if date := item.get("PremiereDate"):
+        scene["date"] = date[:10]
+
+    if details := item.get("Overview"):
+        scene["details"] = details
+
+    if tags := item.get("Genres"):
+        scene["tags"] = [{"name": tag} for tag in tags]
+
+    if studios := item.get("Studios"):
+        scene["studio"] = {"name": studios[0]["Name"]}
+
+    if performers := item.get("People"):
+        scene["performers"] = [
+            {"name": performer["Name"]}
+            for performer in performers
+            if performer["Type"] == "Actor"
+        ]
+
+    if urls := item.get("ExternalUrls"):
+        for url in urls:
+            if url["Name"] in ("IMDb", "TheMovieDb"):
+                scene["url"] = url["Url"]
+
+    return scene
+
+
+def to_scraped_movie(item: dict) -> ScrapedMovie:
+    movie: ScrapedMovie = {
+        "name": item["Name"],
+        "front_image": base_url._replace(
+            path=f"Items/{item['Id']}/Images/Primary"
+        ).geturl(),
+    }
+
+    if duration := item.get("RunTimeTicks"):
+        runtime = timedelta(seconds=(int(duration / 10000000)))
+        movie["duration"] = str(runtime)
+
+    if date := item.get("PremiereDate"):
+        movie["date"] = date[:10]
+    elif date := item.get("ProductionYear"):
+        movie["date"] = str(date)
+
+    if details := item.get("Overview"):
+        movie["synopsis"] = details
+
+    if people := item.get("People"):
+        for person in people:
+            if person["Type"] == "Director":
+                movie["director"] = person["Name"]
+                break
+
+    if studios := item.get("Studios"):
+        movie["studio"] = {
+            "name": studios[0]["Name"],
+        }
+
+    if rating := item.get("CriticRating"):
+        movie["rating"] = str(rating)
+    elif rating := item.get("CommunityRating"):
+        movie["rating"] = str(rating)
+
+    return movie
+
+
+def scene_from_url(url: str) -> ScrapedScene | None:
+    # If users paste their Jellyfin URL we can extract the ID from it
+    if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
+        movie_id = match.group(1)
+        log.debug(f"Converting from web UI URL using ID {movie_id}")
+        url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
+    else:
+        url = urlparse(url)._replace(query=urlencode(base_params)).geturl()
+
+    log.debug(f"Getting scene through URL '{url}'")
+    res = requests.get(url)
+
+    scene = res.json()
+    if not scene["Name"]:
+        log.warning(
+            "Failed to scrape from URL: make sure this scene is in your library"
+        )
+        return None
+
+    log.debug(f"Found scene '{scene['Name']}'")
+
+    return to_scraped_scene(scene)
+
+
+def scene_search(title: str) -> list[ScrapedScene]:
+    # Strip the extension if we get a file name
+    if "." in title:
+        scenename = title.rsplit(".", 1)[0]
+    else:
+        scenename = title
+
+    search_params = {
+        "searchTerm": scenename,
+        "Limit": 10,
+        "IncludeItemTypes": "Movie",
+        "Fields": "Overview,Genres,Studios,People,ExternalUrls,PremiereDate",
+        "Recursive": True,
+        **base_params,
+    }
+
+    search_url = base_url._replace(
+        path="Items", query=urlencode(search_params)
+    ).geturl()
+    log.debug(f"Querying URL: {search_url}")
+
+    res = requests.get(search_url)
+    result = res.json()
+
+    scenes = [to_scraped_scene(item) for item in result["Items"]]
+    # In order for scene-by-query-fragment to scrape these scenes, we need to
+    # add their Jellyfin URLs to the scene object: users can rescrape to get a TMDb URL
+    scrapable_urls = [
+        base_url._replace(path=f"Users/{config.userid}/Items/{item['Id']}").geturl()
+        for item in result["Items"]
+    ]
+    scrapable_scenes = [
+        scene | {"url": url} for scene, url in zip(scenes, scrapable_urls)
+    ]
+
+    log.debug(f"Found {len(scenes)} scenes")
+    return scrapable_scenes
+
+
+def scene_from_fragment(title: str) -> ScrapedScene | None:
+    log.debug(f"Getting scene through fragment title: '{title}'")
+
+    scenes = scene_search(title)
+
+    if not scenes:
+        log.debug(f"Didn't find {title} in fragment mode")
+        return None
+
+    found = scenes[0]
+    log.debug(f"Found '{found['title']}'")  # type: ignore (title will be set)
+    return found
+
+
+def performer_from_url(url: str) -> ScrapedPerformer | None:
+    log.debug(f"Getting performer from URL: '{url}'")
+
+    performer_url = urlparse(url)
+    res = requests.get(performer_url._replace(query=urlencode(base_params)).geturl())
+    person = res.json()
+
+    if "Name" not in person:
+        log.warning(f"Didn't find {url} in URL mode")
+        return None
+
+    log.debug(f"Found performer '{person['Name']}'")
+    performer = to_scraped_performer(person)
+    return performer
+
+
+def performer_search(query: str) -> list[ScrapedPerformer]:
+    log.debug(f"Searching for performer '{query}'")
+    search_params = {
+        "searchTerm": query,
+        "fields": "OriginalTitle,ProductionLocations,PremiereDate,Tags,ExternalUrls",
+        **base_params,
+    }
+
+    get_url = base_url._replace(path="Persons", query=urlencode(search_params))
+    log.debug(f"Querying URL: {get_url}")
+    res = requests.get(get_url.geturl())
+
+    search_result = res.json()
+    performers = [to_scraped_performer(item) for item in search_result["Items"]]
+
+    log.debug(f"Found {len(performers)} performers")
+    return performers
+
+
+def movie_from_url(url):
+    # If users paste their Jellyfin URL we can extract the ID from it
+    if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
+        movie_id = match.group(1)
+        log.debug(f"Converting from web UI URL using ID {movie_id}")
+        url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
+
+    log.debug(f"Getting movie through URL '{url}'")
+    res = requests.get(url)
+    result = res.json()
+
+    if not result.get("Name"):
+        log.warning(
+            "Failed to scrape from URL: make sure this movie is in your library"
+        )
+        return None
+
+    return to_scraped_movie(result)
+
+
+if __name__ == "__main__":
+    op, args = scraper_args()
+
+    result = None
+
+    match op, args:
+        case "scene-by-url" | "scene-by-query-fragment", {"url": url}:
+            result = scene_from_url(url)
+        case "scene-by-name", {"name": name} if name:
+            result = scene_search(name)
+        case "scene-by-fragment", {"title": title}:
+            result = scene_from_fragment(title)
+        case "performer-by-url" | "performer-by-fragment", {"url": url}:
+            result = performer_from_url(url)
+        case "performer-by-name", {"name": name}:
+            result = performer_search(name)
+        case "movie-by-url", {"url": url}:
+            result = movie_from_url(url)
+        case _:
+            log.error(f"Operation: {op}, arguments: {json.dumps(args)}")
+            sys.exit(1)
+
+    print(json.dumps(result))