This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,296 @@
import re
import requests
import sys
import json
from urllib.parse import urlparse, urlencode, quote
from datetime import timedelta
import py_common.log as log
from py_common.config import get_config
from py_common.util import scraper_args
from py_common.types import ScrapedPerformer, ScrapedScene, ScrapedMovie
# DO NOT EDIT THIS FILE
# run the scraper once and edit the config.ini file instead
config = get_config(
default="""# Jellyfin endpoint, change this if Jellyfin is not running on the same server as Stash
host = http://localhost:8096/
# Jellyfin API Key: generate one in Jellyfin Dashboard -> Advanced -> API Keys
api_key = xxxxxxxxxxxxxxxxxxxxxxxx
# Jellyfin user-id (extract from Jellyfin->Admin->User from the URL of the User)
# for example: http://localhost:8096/web/index.html#!/myprofile.html?userId=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
userid = xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
"""
)
errors = []
if "x" in config.api_key:
errors.append("api_key")
if "x" in config.userid:
errors.append("userid")
if errors:
log.error(f"Please configure {' and '.join(errors)} in your config.ini")
exit(1)
base_params = {"api_key": config.api_key, "userid": config.userid}
base_url = urlparse(config.host)._replace(query=urlencode(base_params))
def to_scraped_performer(item: dict) -> ScrapedPerformer:
performer: ScrapedPerformer = {
"name": item["Name"],
"url": base_url._replace(path=f"Persons/{quote(item['Name'])}").geturl(),
"images": [
base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl()
],
}
if birthdate := item.get("PremiereDate"):
performer["birthdate"] = birthdate[:10]
if aliases := item.get("OriginalTitle"):
performer["aliases"] = aliases
if country := item.get("ProductionLocations"):
performer["country"] = country[0].split(",")[-1].strip()
if tags := item.get("Tags"):
performer["tags"] = [{"name": tag} for tag in tags]
return performer
def to_scraped_scene(item: dict) -> ScrapedScene:
scene: ScrapedScene = {
"title": item["Name"],
"url": base_url._replace(path=f"Items/{item['Id']}").geturl(),
"image": base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl(),
}
if date := item.get("PremiereDate"):
scene["date"] = date[:10]
if details := item.get("Overview"):
scene["details"] = details
if tags := item.get("Genres"):
scene["tags"] = [{"name": tag} for tag in tags]
if studios := item.get("Studios"):
scene["studio"] = {"name": studios[0]["Name"]}
if performers := item.get("People"):
scene["performers"] = [
{"name": performer["Name"]}
for performer in performers
if performer["Type"] == "Actor"
]
if urls := item.get("ExternalUrls"):
for url in urls:
if url["Name"] in ("IMDb", "TheMovieDb"):
scene["url"] = url["Url"]
return scene
def to_scraped_movie(item: dict) -> ScrapedMovie:
movie: ScrapedMovie = {
"name": item["Name"],
"front_image": base_url._replace(
path=f"Items/{item['Id']}/Images/Primary"
).geturl(),
}
if duration := item.get("RunTimeTicks"):
runtime = timedelta(seconds=(int(duration / 10000000)))
movie["duration"] = str(runtime)
if date := item.get("PremiereDate"):
movie["date"] = date[:10]
elif date := item.get("ProductionYear"):
movie["date"] = str(date)
if details := item.get("Overview"):
movie["synopsis"] = details
if people := item.get("People"):
for person in people:
if person["Type"] == "Director":
movie["director"] = person["Name"]
break
if studios := item.get("Studios"):
movie["studio"] = {
"name": studios[0]["Name"],
}
if rating := item.get("CriticRating"):
movie["rating"] = str(rating)
elif rating := item.get("CommunityRating"):
movie["rating"] = str(rating)
return movie
def scene_from_url(url: str) -> ScrapedScene | None:
# If users paste their Jellyfin URL we can extract the ID from it
if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
movie_id = match.group(1)
log.debug(f"Converting from web UI URL using ID {movie_id}")
url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
else:
url = urlparse(url)._replace(query=urlencode(base_params)).geturl()
log.debug(f"Getting scene through URL '{url}'")
res = requests.get(url)
scene = res.json()
if not scene["Name"]:
log.warning(
"Failed to scrape from URL: make sure this scene is in your library"
)
return None
log.debug(f"Found scene '{scene['Name']}'")
return to_scraped_scene(scene)
def scene_search(title: str) -> list[ScrapedScene]:
# Strip the extension if we get a file name
if "." in title:
scenename = title.rsplit(".", 1)[0]
else:
scenename = title
search_params = {
"searchTerm": scenename,
"Limit": 10,
"IncludeItemTypes": "Movie",
"Fields": "Overview,Genres,Studios,People,ExternalUrls,PremiereDate",
"Recursive": True,
**base_params,
}
search_url = base_url._replace(
path="Items", query=urlencode(search_params)
).geturl()
log.debug(f"Querying URL: {search_url}")
res = requests.get(search_url)
result = res.json()
scenes = [to_scraped_scene(item) for item in result["Items"]]
# In order for scene-by-query-fragment to scrape these scenes, we need to
# add their Jellyfin URLs to the scene object: users can rescrape to get a TMDb URL
scrapable_urls = [
base_url._replace(path=f"Users/{config.userid}/Items/{item['Id']}").geturl()
for item in result["Items"]
]
scrapable_scenes = [
scene | {"url": url} for scene, url in zip(scenes, scrapable_urls)
]
log.debug(f"Found {len(scenes)} scenes")
return scrapable_scenes
def scene_from_fragment(title: str) -> ScrapedScene | None:
log.debug(f"Getting scene through fragment title: '{title}'")
scenes = scene_search(title)
if not scenes:
log.debug(f"Didn't find {title} in fragment mode")
return None
found = scenes[0]
log.debug(f"Found '{found['title']}'") # type: ignore (title will be set)
return found
def performer_from_url(url: str) -> ScrapedPerformer | None:
log.debug(f"Getting performer from URL: '{url}'")
performer_url = urlparse(url)
res = requests.get(performer_url._replace(query=urlencode(base_params)).geturl())
person = res.json()
if "Name" not in person:
log.warning(f"Didn't find {url} in URL mode")
return None
log.debug(f"Found performer '{person['Name']}'")
performer = to_scraped_performer(person)
return performer
def performer_search(query: str) -> list[ScrapedPerformer]:
log.debug(f"Searching for performer '{query}'")
search_params = {
"searchTerm": query,
"fields": "OriginalTitle,ProductionLocations,PremiereDate,Tags,ExternalUrls",
**base_params,
}
get_url = base_url._replace(path="Persons", query=urlencode(search_params))
log.debug(f"Querying URL: {get_url}")
res = requests.get(get_url.geturl())
search_result = res.json()
performers = [to_scraped_performer(item) for item in search_result["Items"]]
log.debug(f"Found {len(performers)} performers")
return performers
def movie_from_url(url):
# If users paste their Jellyfin URL we can extract the ID from it
if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
movie_id = match.group(1)
log.debug(f"Converting from web UI URL using ID {movie_id}")
url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
log.debug(f"Getting movie through URL '{url}'")
res = requests.get(url)
result = res.json()
if not result.get("Name"):
log.warning(
"Failed to scrape from URL: make sure this movie is in your library"
)
return None
return to_scraped_movie(result)
if __name__ == "__main__":
op, args = scraper_args()
result = None
match op, args:
case "scene-by-url" | "scene-by-query-fragment", {"url": url}:
result = scene_from_url(url)
case "scene-by-name", {"name": name} if name:
result = scene_search(name)
case "scene-by-fragment", {"title": title}:
result = scene_from_fragment(title)
case "performer-by-url" | "performer-by-fragment", {"url": url}:
result = performer_from_url(url)
case "performer-by-name", {"name": name}:
result = performer_search(name)
case "movie-by-url", {"url": url}:
result = movie_from_url(url)
case _:
log.error(f"Operation: {op}, arguments: {json.dumps(args)}")
sys.exit(1)
print(json.dumps(result))

View File

@@ -0,0 +1,56 @@
name: Jellyfin
# requires: py_common
sceneByURL:
- action: script
url: &baseJellyfinURL
# Change this to your Jellyfin URL, same as in config.ini
- localhost:8096
script:
- python
- Jellyfin.py
- scene-by-url
sceneByFragment:
action: script
script:
- python
- Jellyfin.py
- scene-by-fragment
sceneByName:
action: script
script:
- python
- Jellyfin.py
- scene-by-name
sceneByQueryFragment:
action: script
script:
- python
- Jellyfin.py
- scene-by-query-fragment
performerByName:
action: script
script:
- python
- Jellyfin.py
- performer-by-name
performerByURL:
- action: script
url: *baseJellyfinURL
script:
- python
- Jellyfin.py
- performer-by-url
performerByFragment:
action: script
script:
- python
- Jellyfin.py
- performer-by-fragment
movieByURL:
- action: script
url: *baseJellyfinURL
script:
- python
- Jellyfin.py
- movie-by-url
# Last Updated January 30, 2024

View File

@@ -0,0 +1,10 @@
id: Jellyfin
name: Jellyfin
metadata: {}
version: c76b223
date: "2024-01-31 17:58:58"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Jellyfin.py
- Jellyfin.yml