stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/AnimeCharactersDatabase/AnimeCharactersDatabase.py
+++ b/stash/config/scrapers/community/AnimeCharactersDatabase/AnimeCharactersDatabase.py
@@ -0,0 +1,267 @@
+import json
+import os
+import re
+import sys
+from datetime import datetime
+
+# to import from a parent directory we need to add that directory to the system path
+csd = os.path.dirname(os.path.realpath(__file__))  # get current script directory
+parent = os.path.dirname(csd)  # parent directory (should be the scrapers one)
+sys.path.append(
+    parent
+)  # add parent dir to sys path so that we can import py_common from there
+
+try:
+    import cloudscraper
+except ModuleNotFoundError:
+    print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr)
+    sys.exit()
+
+try:
+    import requests
+except ModuleNotFoundError:
+    print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
+    sys.exit()
+    
+try:
+    from lxml import html
+except ModuleNotFoundError:
+    print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr)
+    sys.exit()
+
+try:
+    import py_common.log as log
+except ModuleNotFoundError:
+    print(
+        "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
+        file=sys.stderr)
+    sys.exit(1)
+
+#  --------------------------------------
+
+# This is a scraper for: animecharactersdatabase.com
+#
+# AnimeCharactersDatabase includes characters from:
+# Anime, Hentai, (Mobile) Games, Eroge, Virtual Idols/YouTubers, Vocaloid
+#
+# These fields will be populated if available:
+# Name, Gender, Birthdate, Country, Hair Color, Eye Color, Height, Measurements, URL, Details, Tags, Image
+#
+# A number of additional tags can be configured below.
+
+# ---------------------------------------
+# ---------- Tag Configuration ----------
+# ---------------------------------------
+
+# Maximum number of search results (between 1 and 30).
+# Search by name includes the franchise for each result to make it easier to choose the correct one.
+# Some (non ascii, very short) names require querying the API individually to get the franchise for each result.
+# This might get you banned, since the API is rate limited.
+# See: http://wiki.animecharactersdatabase.com/index.php?title=API_Access
+limit = 15
+
+# Prefix for performer tags.
+prefix = "performer:"
+
+# List of additional tags.
+additional_tags = [{"name": "fictional"}]  # []
+
+# Tags mostly include appearance indicators like: ahoge, dress, hat, twintails, etc.
+include_tag = True
+tag_prefix = prefix
+
+# Scrape the source material as tag (name of anime/game): Kantai Collection, Idolmaster: Cinderella Girls, etc.
+include_parody = True
+parody_prefix = "parody:"
+
+# Scrape Zodiac Sign as tag: Libra ♎, Sagittarius ♐, etc.
+include_sign = True
+sign_prefix = prefix + "sign:"
+
+# Scrape race of non-human characters as tag: Orc, Elf, etc.
+include_race = True
+race_prefix = prefix + "race:"
+
+# Scrape ship class of ship girls as tag (kancolle, etc.): Destroyer, etc.
+include_ship_class = True
+ship_class_prefix = prefix + "ship:"
+
+# Scrape blood type as tag: A, B, etc.
+include_blood_type = True
+blood_type_prefix = prefix + "Blood Type "
+
+# Scrape apparent age as tag: Adult, Teen, etc.
+# Might differ from canonical age.
+# Canonical age will be ignored, since it would result in too many tags.
+# Birthdate is sometimes available, but the resulting calculated age represents neither canonical age nor apparent age.
+include_apparent_age = True
+apparent_age_prefix = prefix + "Apparent "
+
+# Scrape Hair Length as tag: To Shoulders, To Neck, Past Waist, etc.
+include_hair_length = True
+hair_length_prefix = prefix + "Hair "
+
+
+# ---------------------------------------
+# ---------------------------------------
+# ---------------------------------------
+
+def readJSONInput():
+    input = sys.stdin.read()
+    return json.loads(input)
+
+
+def scrapeURL(url):
+    return html.fromstring(scrapeUrlToString(url))
+
+
+def scrapeUrlToString(url):
+    scraper = cloudscraper.create_scraper()
+    try:
+        scraped = scraper.get(url)
+    except:
+        log.error("scrape error")
+        sys.exit(1)
+
+    if scraped.status_code >= 400:
+        log.error('HTTP Error: %s' % scraped.status_code)
+        sys.exit(1)
+
+    return scraped.content
+
+
+def performerByName(query):
+    cleanedQuery = requests.utils.quote(query)
+    url = f"https://www.animecharactersdatabase.com/searchall.php?in=characters&sq={cleanedQuery}"
+    tree = scrapeURL(url)
+    names = tree.xpath("//li/div[@class='tile3top']/a/text()")
+    ids = tree.xpath("//li/div[@class='tile3top']/a/@href")
+
+    results = []
+    for name, id in zip(names, ids):
+        results.append({
+            "name": name,
+            "id": id.replace("characters.php?id=", ""),
+            "url": "https://www.animecharactersdatabase.com/" + id
+        })
+    log.info(f"scraped {len(results)} results on: {url}")
+    return results
+
+
+def addFranchise(query, results):
+    cleanedQuery = requests.utils.quote(query)
+    url = f"https://www.animecharactersdatabase.com/api_series_characters.php?character_q={cleanedQuery}"
+    data = json.loads(scrapeUrlToString(url))
+    count1 = 0
+    count2 = 0
+    for result in results:
+        try:
+            # Try to find the franchise in API search results.
+            # These results are ordered by alphabet and limited to 100,
+            # so short queries might not include the correct result.
+            # The API query also does not seem to support any Kanji.
+            franchise = next(e["anime_name"] for e in data["search_results"] if str(e["id"]) == result["id"])
+            count1 += 1
+        except:
+            # Use separate API calls as a backup.
+            # This might get you banned, since the API is rate limited.
+            franchise = apiGetCharacter(result["id"])["origin"]
+            count2 += 1
+        # Append franchise to character name for easier differentiation.
+        result["name"] = f"{result['name']} ({franchise})"
+        result.pop("id")
+    log.debug(f"scraped {count1} franchises by single API call")
+    log.debug(f"scraped {count2} franchises by separate API calls")
+    return results
+
+
+def apiGetCharacter(id):
+    url = f"https://www.animecharactersdatabase.com/api_series_characters.php?character_id={id}"
+    return json.loads(scrapeUrlToString(url))
+
+
+def performerByURL(url, result={}):
+    log.debug("performerByURL: " + url)
+    tree = scrapeURL(url)
+    result["url"] = url
+    result["name"] = next(iter(tree.xpath(
+        "//h3[@id='section001_summary']/following-sibling::p/a[contains(@href,'character')]/text()")), "").strip()
+    result["details"] = "\n".join([s.strip() for s in tree.xpath(
+        "//div[@style='padding: 0 15px 15px 15px; text-align: left;']/text()")])
+    if not result["details"]:
+        result["details"] = re.sub(" .$", ".", " ".join([s.strip() for s in tree.xpath(
+            "//h3[@id='section001_summary']/following-sibling::p[contains(a/@href,'character')]//text()") if
+                                                         s.strip()]))
+    result["image"] = next(iter(tree.xpath("//meta[@property='og:image']/@content")), "")
+
+    # left table, works for link and plain text fields, return result list
+    def parse_left(field):
+        template = "//table//th[text()='{0}' or a/text()='{0}']/following-sibling::td/a/text()"
+        return tree.xpath(template.format(field))
+
+    result["tags"] = additional_tags
+    if include_tag:
+        result["tags"] += [{"name": tag_prefix + tag.strip()} for tag in parse_left("Tags ")]
+    if include_parody:
+        result["tags"] += [{"name": parody_prefix + tag.strip()} for tag in parse_left("From")]
+    if include_blood_type:
+        result["tags"] += [{"name": blood_type_prefix + tag.strip()} for tag in parse_left("Blood Type")]
+    if include_race:
+        result["tags"] += [{"name": race_prefix + tag.strip()} for tag in parse_left("Race")]
+    if include_sign:
+        result["tags"] += [{"name": sign_prefix + tag.strip()} for tag in parse_left("Sign")]
+    if include_ship_class:
+        result["tags"] += [{"name": ship_class_prefix + tag.strip()} for tag in parse_left("Ship Class")]
+    result["country"] = next(iter(parse_left("Nationality")), "")
+    birthday = parse_left("Birthday")
+    birthyear = parse_left("Birthyear")
+    if birthday and birthyear:
+        birthdate = datetime.strptime(birthday[0].strip(), "%B %d").replace(year=int(birthyear[0].strip()))
+        result["birthdate"] = birthdate.strftime("%Y-%m-%d")
+    bust = parse_left("Bust")
+    waist = parse_left("Waist")
+    hip = parse_left("Hip")
+    if bust and waist and hip:
+        bust = bust[0].strip().replace("cm", "")
+        waist = waist[0].strip().replace("cm", "")
+        hip = hip[0].strip().replace("cm", "")
+        result["measurements"] = "{}-{}-{}".format(bust, waist, hip)
+    result["height"] = next(iter(parse_left("Height")), "").strip().replace("cm", "")
+
+    # middle/right table, reverse result list to prefer official appearance, return result or empty string
+    def parse_right(field):
+        template = "//table//th[text()='{}']/following-sibling::td/text()"
+        return next(reversed(tree.xpath(template.format(field))), "").strip().replace("Unknown", "")
+
+    # should be tagged anyway if yes
+    # if parse_right("Animal Ears") == "Yes":
+    #     result["tags"] += [{"name": "performer:animal ears"}]
+    hair_length = parse_right("Hair Length")
+    if include_hair_length and hair_length:
+        result["tags"] += [{"name": hair_length_prefix + hair_length}]
+    apparent_age = parse_right("Apparent Age")
+    if include_apparent_age and apparent_age:
+        result["tags"] += [{"name": apparent_age_prefix + apparent_age}]
+    result["gender"] = parse_right("Gender")
+    result["eye_color"] = parse_right("Eye Color")
+    result["hair_color"] = parse_right("Hair Color")
+
+    return result
+
+
+# read the input
+i = readJSONInput()
+
+if sys.argv[1] == "performerByURL":
+    url = i["url"]
+    result = performerByURL(url)
+    print(json.dumps(result))
+elif sys.argv[1] == "performerByName":
+    name = i["name"]
+    log.info(f"Searching for name: {name}")
+    results = performerByName(name)[:limit]
+    results = addFranchise(name, results)
+    print(json.dumps(results))
--- a/stash/config/scrapers/community/AnimeCharactersDatabase/AnimeCharactersDatabase.yml
+++ b/stash/config/scrapers/community/AnimeCharactersDatabase/AnimeCharactersDatabase.yml
@@ -0,0 +1,20 @@
+name: AnimeCharactersDatabase
+# requires: py_common
+
+performerByURL:
+- action: script
+  url:
+    - animecharactersdatabase.com/characters.php
+  script:
+    - python
+    - AnimeCharactersDatabase.py
+    - performerByURL
+
+performerByName:
+  action: script
+  script:
+    - python
+    - AnimeCharactersDatabase.py
+    - performerByName
+
+# Last Updated January 23, 2022
--- a/stash/config/scrapers/community/AnimeCharactersDatabase/manifest
+++ b/stash/config/scrapers/community/AnimeCharactersDatabase/manifest
@@ -0,0 +1,10 @@
+id: AnimeCharactersDatabase
+name: AnimeCharactersDatabase
+metadata: {}
+version: 4fcb313
+date: "2023-12-19 22:39:58"
+requires: []
+source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
+files:
+- AnimeCharactersDatabase.yml
+- AnimeCharactersDatabase.py