297 lines
8.9 KiB
Python
297 lines
8.9 KiB
Python
import re
|
|
import requests
|
|
import sys
|
|
import json
|
|
from urllib.parse import urlparse, urlencode, quote
|
|
from datetime import timedelta
|
|
|
|
import py_common.log as log
|
|
from py_common.config import get_config
|
|
from py_common.util import scraper_args
|
|
from py_common.types import ScrapedPerformer, ScrapedScene, ScrapedMovie
|
|
|
|
|
|
# DO NOT EDIT THIS FILE
|
|
# run the scraper once and edit the config.ini file instead
|
|
config = get_config(
|
|
default="""# Jellyfin endpoint, change this if Jellyfin is not running on the same server as Stash
|
|
host = http://localhost:8096/
|
|
|
|
# Jellyfin API Key: generate one in Jellyfin Dashboard -> Advanced -> API Keys
|
|
api_key = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
|
|
# Jellyfin user-id (extract from Jellyfin->Admin->User from the URL of the User)
|
|
# for example: http://localhost:8096/web/index.html#!/myprofile.html?userId=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|
userid = xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|
"""
|
|
)
|
|
|
|
errors = []
|
|
|
|
if "x" in config.api_key:
|
|
errors.append("api_key")
|
|
|
|
if "x" in config.userid:
|
|
errors.append("userid")
|
|
|
|
if errors:
|
|
log.error(f"Please configure {' and '.join(errors)} in your config.ini")
|
|
exit(1)
|
|
|
|
base_params = {"api_key": config.api_key, "userid": config.userid}
|
|
base_url = urlparse(config.host)._replace(query=urlencode(base_params))
|
|
|
|
|
|
def to_scraped_performer(item: dict) -> ScrapedPerformer:
|
|
performer: ScrapedPerformer = {
|
|
"name": item["Name"],
|
|
"url": base_url._replace(path=f"Persons/{quote(item['Name'])}").geturl(),
|
|
"images": [
|
|
base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl()
|
|
],
|
|
}
|
|
|
|
if birthdate := item.get("PremiereDate"):
|
|
performer["birthdate"] = birthdate[:10]
|
|
|
|
if aliases := item.get("OriginalTitle"):
|
|
performer["aliases"] = aliases
|
|
|
|
if country := item.get("ProductionLocations"):
|
|
performer["country"] = country[0].split(",")[-1].strip()
|
|
|
|
if tags := item.get("Tags"):
|
|
performer["tags"] = [{"name": tag} for tag in tags]
|
|
|
|
return performer
|
|
|
|
|
|
def to_scraped_scene(item: dict) -> ScrapedScene:
|
|
scene: ScrapedScene = {
|
|
"title": item["Name"],
|
|
"url": base_url._replace(path=f"Items/{item['Id']}").geturl(),
|
|
"image": base_url._replace(path=f"Items/{item['Id']}/Images/Primary").geturl(),
|
|
}
|
|
|
|
if date := item.get("PremiereDate"):
|
|
scene["date"] = date[:10]
|
|
|
|
if details := item.get("Overview"):
|
|
scene["details"] = details
|
|
|
|
if tags := item.get("Genres"):
|
|
scene["tags"] = [{"name": tag} for tag in tags]
|
|
|
|
if studios := item.get("Studios"):
|
|
scene["studio"] = {"name": studios[0]["Name"]}
|
|
|
|
if performers := item.get("People"):
|
|
scene["performers"] = [
|
|
{"name": performer["Name"]}
|
|
for performer in performers
|
|
if performer["Type"] == "Actor"
|
|
]
|
|
|
|
if urls := item.get("ExternalUrls"):
|
|
for url in urls:
|
|
if url["Name"] in ("IMDb", "TheMovieDb"):
|
|
scene["url"] = url["Url"]
|
|
|
|
return scene
|
|
|
|
|
|
def to_scraped_movie(item: dict) -> ScrapedMovie:
|
|
movie: ScrapedMovie = {
|
|
"name": item["Name"],
|
|
"front_image": base_url._replace(
|
|
path=f"Items/{item['Id']}/Images/Primary"
|
|
).geturl(),
|
|
}
|
|
|
|
if duration := item.get("RunTimeTicks"):
|
|
runtime = timedelta(seconds=(int(duration / 10000000)))
|
|
movie["duration"] = str(runtime)
|
|
|
|
if date := item.get("PremiereDate"):
|
|
movie["date"] = date[:10]
|
|
elif date := item.get("ProductionYear"):
|
|
movie["date"] = str(date)
|
|
|
|
if details := item.get("Overview"):
|
|
movie["synopsis"] = details
|
|
|
|
if people := item.get("People"):
|
|
for person in people:
|
|
if person["Type"] == "Director":
|
|
movie["director"] = person["Name"]
|
|
break
|
|
|
|
if studios := item.get("Studios"):
|
|
movie["studio"] = {
|
|
"name": studios[0]["Name"],
|
|
}
|
|
|
|
if rating := item.get("CriticRating"):
|
|
movie["rating"] = str(rating)
|
|
elif rating := item.get("CommunityRating"):
|
|
movie["rating"] = str(rating)
|
|
|
|
return movie
|
|
|
|
|
|
def scene_from_url(url: str) -> ScrapedScene | None:
|
|
# If users paste their Jellyfin URL we can extract the ID from it
|
|
if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
|
|
movie_id = match.group(1)
|
|
log.debug(f"Converting from web UI URL using ID {movie_id}")
|
|
url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
|
|
else:
|
|
url = urlparse(url)._replace(query=urlencode(base_params)).geturl()
|
|
|
|
log.debug(f"Getting scene through URL '{url}'")
|
|
res = requests.get(url)
|
|
|
|
scene = res.json()
|
|
if not scene["Name"]:
|
|
log.warning(
|
|
"Failed to scrape from URL: make sure this scene is in your library"
|
|
)
|
|
return None
|
|
|
|
log.debug(f"Found scene '{scene['Name']}'")
|
|
|
|
return to_scraped_scene(scene)
|
|
|
|
|
|
def scene_search(title: str) -> list[ScrapedScene]:
|
|
# Strip the extension if we get a file name
|
|
if "." in title:
|
|
scenename = title.rsplit(".", 1)[0]
|
|
else:
|
|
scenename = title
|
|
|
|
search_params = {
|
|
"searchTerm": scenename,
|
|
"Limit": 10,
|
|
"IncludeItemTypes": "Movie",
|
|
"Fields": "Overview,Genres,Studios,People,ExternalUrls,PremiereDate",
|
|
"Recursive": True,
|
|
**base_params,
|
|
}
|
|
|
|
search_url = base_url._replace(
|
|
path="Items", query=urlencode(search_params)
|
|
).geturl()
|
|
log.debug(f"Querying URL: {search_url}")
|
|
|
|
res = requests.get(search_url)
|
|
result = res.json()
|
|
|
|
scenes = [to_scraped_scene(item) for item in result["Items"]]
|
|
# In order for scene-by-query-fragment to scrape these scenes, we need to
|
|
# add their Jellyfin URLs to the scene object: users can rescrape to get a TMDb URL
|
|
scrapable_urls = [
|
|
base_url._replace(path=f"Users/{config.userid}/Items/{item['Id']}").geturl()
|
|
for item in result["Items"]
|
|
]
|
|
scrapable_scenes = [
|
|
scene | {"url": url} for scene, url in zip(scenes, scrapable_urls)
|
|
]
|
|
|
|
log.debug(f"Found {len(scenes)} scenes")
|
|
return scrapable_scenes
|
|
|
|
|
|
def scene_from_fragment(title: str) -> ScrapedScene | None:
|
|
log.debug(f"Getting scene through fragment title: '{title}'")
|
|
|
|
scenes = scene_search(title)
|
|
|
|
if not scenes:
|
|
log.debug(f"Didn't find {title} in fragment mode")
|
|
return None
|
|
|
|
found = scenes[0]
|
|
log.debug(f"Found '{found['title']}'") # type: ignore (title will be set)
|
|
return found
|
|
|
|
|
|
def performer_from_url(url: str) -> ScrapedPerformer | None:
|
|
log.debug(f"Getting performer from URL: '{url}'")
|
|
|
|
performer_url = urlparse(url)
|
|
res = requests.get(performer_url._replace(query=urlencode(base_params)).geturl())
|
|
person = res.json()
|
|
|
|
if "Name" not in person:
|
|
log.warning(f"Didn't find {url} in URL mode")
|
|
return None
|
|
|
|
log.debug(f"Found performer '{person['Name']}'")
|
|
performer = to_scraped_performer(person)
|
|
return performer
|
|
|
|
|
|
def performer_search(query: str) -> list[ScrapedPerformer]:
|
|
log.debug(f"Searching for performer '{query}'")
|
|
search_params = {
|
|
"searchTerm": query,
|
|
"fields": "OriginalTitle,ProductionLocations,PremiereDate,Tags,ExternalUrls",
|
|
**base_params,
|
|
}
|
|
|
|
get_url = base_url._replace(path="Persons", query=urlencode(search_params))
|
|
log.debug(f"Querying URL: {get_url}")
|
|
res = requests.get(get_url.geturl())
|
|
|
|
search_result = res.json()
|
|
performers = [to_scraped_performer(item) for item in search_result["Items"]]
|
|
|
|
log.debug(f"Found {len(performers)} performers")
|
|
return performers
|
|
|
|
|
|
def movie_from_url(url):
|
|
# If users paste their Jellyfin URL we can extract the ID from it
|
|
if match := re.search(r"(?<!user)id=([a-f0-9]+)", url):
|
|
movie_id = match.group(1)
|
|
log.debug(f"Converting from web UI URL using ID {movie_id}")
|
|
url = base_url._replace(path=f"Users/{config.userid}/Items/{movie_id}").geturl()
|
|
|
|
log.debug(f"Getting movie through URL '{url}'")
|
|
res = requests.get(url)
|
|
result = res.json()
|
|
|
|
if not result.get("Name"):
|
|
log.warning(
|
|
"Failed to scrape from URL: make sure this movie is in your library"
|
|
)
|
|
return None
|
|
|
|
return to_scraped_movie(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
op, args = scraper_args()
|
|
|
|
result = None
|
|
|
|
match op, args:
|
|
case "scene-by-url" | "scene-by-query-fragment", {"url": url}:
|
|
result = scene_from_url(url)
|
|
case "scene-by-name", {"name": name} if name:
|
|
result = scene_search(name)
|
|
case "scene-by-fragment", {"title": title}:
|
|
result = scene_from_fragment(title)
|
|
case "performer-by-url" | "performer-by-fragment", {"url": url}:
|
|
result = performer_from_url(url)
|
|
case "performer-by-name", {"name": name}:
|
|
result = performer_search(name)
|
|
case "movie-by-url", {"url": url}:
|
|
result = movie_from_url(url)
|
|
case _:
|
|
log.error(f"Operation: {op}, arguments: {json.dumps(args)}")
|
|
sys.exit(1)
|
|
|
|
print(json.dumps(result))
|