stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/torrent/torrent.py
+++ b/stash/config/scrapers/community/torrent/torrent.py
@@ -0,0 +1,150 @@
+import sys
+import json
+import os
+from pathlib import Path
+import re
+from datetime import datetime
+import difflib
+
+# to import from a parent directory we need to add that directory to the system path
+csd = os.path.dirname(os.path.realpath(__file__))  # get current script directory
+parent = os.path.dirname(csd)  # parent directory (should be the scrapers one)
+sys.path.append(
+    parent
+)  # add parent dir to sys path so that we can import py_common from there
+
+try:
+    from bencoder import bdecode
+except ModuleNotFoundError:
+    print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr)
+    sys.exit()
+
+try:
+    from py_common import graphql
+except ModuleNotFoundError:
+    print("You need to download the folder 'py_common' from the community repo! "
+          "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
+    sys.exit()
+
+TORRENTS_PATH = Path("torrents")
+
+
+def get_scene_data(fragment_data):
+    scene_id = fragment_data["id"]
+    scene_title = fragment_data["title"]
+    scene_files = []
+
+    response = graphql.callGraphQL("""
+    query FileInfoBySceneId($id: ID) {
+      findScene(id: $id) {
+        files {
+          path
+          size
+        }
+      }
+    }""", {"id": scene_id})
+
+    if response and response["findScene"]:
+        for f in response["findScene"]["files"]:
+            scene_files.append({"filename": os.path.basename(f["path"]), "size": f["size"]})
+        return {"id": scene_id, "title": scene_title, "files": scene_files}
+    return {}
+
+def process_tags_performers(tagList):
+    return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList)
+
+def process_description_bbcode(description):
+    #Remove image tags
+    res = re.sub(r'\[img\]([^\[]*)\[\/img\]',r"", description )
+
+    #Remove bbcode & replace with the contained text
+    res = re.sub(r'\[.*?\]([^\[]*)\[\/(?:b|i|u|s|url|quote)\]',r"\1", res )
+
+    #Cleanup any bbcode tags that may have been left behind
+    res = re.sub(r'\[.*?\]',r'',res)
+
+    #Remove excessive newlines
+    res = re.sub(r'[\r|\n]{3,}', '\r\n\r\n', res)
+    return res.strip()
+
+def get_torrent_metadata(torrent_data):
+    res = {}
+
+    if b"metadata" in torrent_data:
+        if b"title" in torrent_data[b"metadata"]:
+            res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"])
+        if b"cover url" in torrent_data[b"metadata"]:
+            res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"])
+        if b"description" in torrent_data[b"metadata"]:
+            res["details"] = process_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"]))
+        if b"taglist" in torrent_data[b"metadata"]:
+            res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]]
+        if b"taglist" in torrent_data[b"metadata"]:
+            res["performers"]=[{"name":x} for x in process_tags_performers(torrent_data[b"metadata"][b"taglist"])]
+        if b"comment" in torrent_data:
+            res["url"] = decode_bytes(torrent_data[b"comment"])
+        if b"creation date" in torrent_data:
+            res["date"] = datetime.fromtimestamp(torrent_data[b"creation date"]).strftime("%Y-%m-%d")
+    return res
+
+
+def decode_bytes(s, encodings=("utf-8", "latin-1")):
+    for enc in encodings:
+        try:
+            return s.decode(enc)
+        except UnicodeDecodeError:
+            pass
+    return s.decode("utf-8", "ignore")
+
+
+def scene_in_torrent(scene_data, torrent_data):
+    for scene in scene_data["files"]:
+        if b"length" in torrent_data[b"info"]:
+            if scene["filename"] in decode_bytes(torrent_data[b"info"][b"name"]) and torrent_data[b"info"][b"length"] == scene["size"]:
+                return True
+        elif b"files" in torrent_data[b"info"]:
+            for file in torrent_data[b"info"][b"files"]:
+                if scene["filename"] in decode_bytes(file[b"path"][-1]) and file[b"length"] == scene["size"]:
+                    return True
+
+
+def process_torrents(scene_data):
+    if scene_data:
+        for name in TORRENTS_PATH.glob("*.torrent"):
+            with open(name, "rb") as f:
+                torrent_data = bdecode(f.read())
+                if scene_in_torrent(scene_data, torrent_data):
+                    return get_torrent_metadata(torrent_data)
+    return {}
+
+def similarity_file_name(search, fileName):
+    result = difflib.SequenceMatcher(a=search.lower(), b=fileName.lower())
+    return result.ratio()
+
+def cleanup_name(name):
+    ret = str(name)
+    ret = ret.removeprefix("torrents\\").removesuffix(".torrent")
+    return ret
+
+if sys.argv[1] == "query":
+    fragment = json.loads(sys.stdin.read())
+    print(json.dumps(process_torrents(get_scene_data(fragment))))
+elif sys.argv[1] == "fragment":
+    filename = json.loads(sys.stdin.read()).get('url')
+    with open(filename, 'rb') as f:
+        torrent_data = bdecode(f.read())
+        print(json.dumps(get_torrent_metadata(torrent_data)))
+elif sys.argv[1] == "search":
+    search = json.loads(sys.stdin.read()).get('name')
+    torrents = list(TORRENTS_PATH.rglob('*.torrent'))
+    ratios = {}
+    for t in torrents:
+        clean_t = cleanup_name(t)
+        ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t}
+
+    # Order ratios
+    ratios_sorted = dict(sorted(ratios.items())[:5])
+
+    print(json.dumps(list(ratios_sorted.values())))
+
+# Last Updated June 12, 2023