stash
This commit is contained in:
150
stash/config/scrapers/community/torrent/torrent.py
Normal file
150
stash/config/scrapers/community/torrent/torrent.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
from datetime import datetime
|
||||
import difflib
|
||||
|
||||
# to import from a parent directory we need to add that directory to the system path
|
||||
csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory
|
||||
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
|
||||
sys.path.append(
|
||||
parent
|
||||
) # add parent dir to sys path so that we can import py_common from there
|
||||
|
||||
try:
|
||||
from bencoder import bdecode
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
from py_common import graphql
|
||||
except ModuleNotFoundError:
|
||||
print("You need to download the folder 'py_common' from the community repo! "
|
||||
"(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
TORRENTS_PATH = Path("torrents")
|
||||
|
||||
|
||||
def get_scene_data(fragment_data):
|
||||
scene_id = fragment_data["id"]
|
||||
scene_title = fragment_data["title"]
|
||||
scene_files = []
|
||||
|
||||
response = graphql.callGraphQL("""
|
||||
query FileInfoBySceneId($id: ID) {
|
||||
findScene(id: $id) {
|
||||
files {
|
||||
path
|
||||
size
|
||||
}
|
||||
}
|
||||
}""", {"id": scene_id})
|
||||
|
||||
if response and response["findScene"]:
|
||||
for f in response["findScene"]["files"]:
|
||||
scene_files.append({"filename": os.path.basename(f["path"]), "size": f["size"]})
|
||||
return {"id": scene_id, "title": scene_title, "files": scene_files}
|
||||
return {}
|
||||
|
||||
def process_tags_performers(tagList):
|
||||
return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList)
|
||||
|
||||
def process_description_bbcode(description):
|
||||
#Remove image tags
|
||||
res = re.sub(r'\[img\]([^\[]*)\[\/img\]',r"", description )
|
||||
|
||||
#Remove bbcode & replace with the contained text
|
||||
res = re.sub(r'\[.*?\]([^\[]*)\[\/(?:b|i|u|s|url|quote)\]',r"\1", res )
|
||||
|
||||
#Cleanup any bbcode tags that may have been left behind
|
||||
res = re.sub(r'\[.*?\]',r'',res)
|
||||
|
||||
#Remove excessive newlines
|
||||
res = re.sub(r'[\r|\n]{3,}', '\r\n\r\n', res)
|
||||
return res.strip()
|
||||
|
||||
def get_torrent_metadata(torrent_data):
|
||||
res = {}
|
||||
|
||||
if b"metadata" in torrent_data:
|
||||
if b"title" in torrent_data[b"metadata"]:
|
||||
res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"])
|
||||
if b"cover url" in torrent_data[b"metadata"]:
|
||||
res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"])
|
||||
if b"description" in torrent_data[b"metadata"]:
|
||||
res["details"] = process_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"]))
|
||||
if b"taglist" in torrent_data[b"metadata"]:
|
||||
res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]]
|
||||
if b"taglist" in torrent_data[b"metadata"]:
|
||||
res["performers"]=[{"name":x} for x in process_tags_performers(torrent_data[b"metadata"][b"taglist"])]
|
||||
if b"comment" in torrent_data:
|
||||
res["url"] = decode_bytes(torrent_data[b"comment"])
|
||||
if b"creation date" in torrent_data:
|
||||
res["date"] = datetime.fromtimestamp(torrent_data[b"creation date"]).strftime("%Y-%m-%d")
|
||||
return res
|
||||
|
||||
|
||||
def decode_bytes(s, encodings=("utf-8", "latin-1")):
|
||||
for enc in encodings:
|
||||
try:
|
||||
return s.decode(enc)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
return s.decode("utf-8", "ignore")
|
||||
|
||||
|
||||
def scene_in_torrent(scene_data, torrent_data):
|
||||
for scene in scene_data["files"]:
|
||||
if b"length" in torrent_data[b"info"]:
|
||||
if scene["filename"] in decode_bytes(torrent_data[b"info"][b"name"]) and torrent_data[b"info"][b"length"] == scene["size"]:
|
||||
return True
|
||||
elif b"files" in torrent_data[b"info"]:
|
||||
for file in torrent_data[b"info"][b"files"]:
|
||||
if scene["filename"] in decode_bytes(file[b"path"][-1]) and file[b"length"] == scene["size"]:
|
||||
return True
|
||||
|
||||
|
||||
def process_torrents(scene_data):
|
||||
if scene_data:
|
||||
for name in TORRENTS_PATH.glob("*.torrent"):
|
||||
with open(name, "rb") as f:
|
||||
torrent_data = bdecode(f.read())
|
||||
if scene_in_torrent(scene_data, torrent_data):
|
||||
return get_torrent_metadata(torrent_data)
|
||||
return {}
|
||||
|
||||
def similarity_file_name(search, fileName):
|
||||
result = difflib.SequenceMatcher(a=search.lower(), b=fileName.lower())
|
||||
return result.ratio()
|
||||
|
||||
def cleanup_name(name):
|
||||
ret = str(name)
|
||||
ret = ret.removeprefix("torrents\\").removesuffix(".torrent")
|
||||
return ret
|
||||
|
||||
if sys.argv[1] == "query":
|
||||
fragment = json.loads(sys.stdin.read())
|
||||
print(json.dumps(process_torrents(get_scene_data(fragment))))
|
||||
elif sys.argv[1] == "fragment":
|
||||
filename = json.loads(sys.stdin.read()).get('url')
|
||||
with open(filename, 'rb') as f:
|
||||
torrent_data = bdecode(f.read())
|
||||
print(json.dumps(get_torrent_metadata(torrent_data)))
|
||||
elif sys.argv[1] == "search":
|
||||
search = json.loads(sys.stdin.read()).get('name')
|
||||
torrents = list(TORRENTS_PATH.rglob('*.torrent'))
|
||||
ratios = {}
|
||||
for t in torrents:
|
||||
clean_t = cleanup_name(t)
|
||||
ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t}
|
||||
|
||||
# Order ratios
|
||||
ratios_sorted = dict(sorted(ratios.items())[:5])
|
||||
|
||||
print(json.dumps(list(ratios_sorted.values())))
|
||||
|
||||
# Last Updated June 12, 2023
|
||||
Reference in New Issue
Block a user