Files
compose-projects-arr/stash/config/scrapers/community/AniDB/AniDB.yml
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

234 lines
9.6 KiB
YAML

name: AniDB
# ~~~~~~ GETTING STARTED ~~~~~~
# Store this file in the ~/stash/scrapers/AniDB.yml
# - If the scrapers directory is not there, you must create it first
#
# ~~~~~~ SETTING COOKIES ~~~~~~
# Note: I recommend creating a new account just for this scraper
# 1. Access the anidb.net website > login > right button > inspect > find cookies storage
# 2. Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document
# 3. If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content
# 4. Do not change the order of the columns, as it can make it stop working
#
# ~~~~~~ SETTING USER AGENT ~~~~~~
# - Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent
# - Use the User Agent of your choice
# - For example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0
#
# ~~~~~ RECOMMENDED WORKFLOW ~~~~~
# 1. Scrape scene by fragment (for performers, tags, artwork, etc)
# - If this fails, scrape by anime URL
# 2. Scrape by episode URL (for title, date)
# 3. Manually set movie scene number on scene page
# 3. Navigate to each performer's page & scrape by URL
# 4. Navigate to movie page & scrape by URL
#
# ~~~~~~ HOW TO USE (detailed) ~~~~~~
# tl;dr when in doubt, use the URL scrapers
# - For scenes: anidb.net/episode/XXX, anidb.net/anime/XXX
# - For performers: anidb.net/character/XXX
# - For movies: anidb.net/anime/XXX
#
# SCENES (by anime):
# - The Scraper by Fragment will usually work, assuming a filename like "[XX] My Lewd Anime - 01 (720p) (x264).mkv"
# - This regex expression strips underscores, dashes, content containing brackets and parentheses, and two digit numbers
# - For example, the above filename is stripped to "My Lewd Anime"
# - If this does not work, I recommend scraping with the episode URL, the anime URL, or the name scraper
# - By default, the scene scraper does not set the title, as the episode scraper serves this purpose better
# - However, if you'd like to enable this functionality, uncomment the "Title" line in sceneScraperAnime > scene
# - The scene (by anime) scraper automatically creates a new movie (i.e., series) entry,
# but unfortunately you will have to set the movie scene (i.e., episode) number manually
#
# SCENES (by episode):
# - This scraper is only accessible by scraping the episode URL (anidb.net/episode/XXX)
# - The scene episode scraper sets the episode title, the anime URL (if missing), and the original airing date
# - By default, the regex expression strips the episode number when setting the title
# - If you want to keep the episode number, delete the second regex replacement in
# sceneScraperEpisode > scene > Title > postProcess > replace
#
# MOVIES:
# - The scene (by anime) scraper automatically creates a new movie entry using the anime title and anime URL
# - On the movie page, you can scrape by URL
#
# PERFORMERS:
# - Performers need to be individually scraped by name or URL
# - I recommend creating them by scraping the anime URL, then navigating to the performer page.
# The performer URL should already be set, so you just need to press the scrape by URL button.
#
# ~~~~~ TROUBLESHOOTING ~~~~~
# - If you find that the scraper has suddenly stopped working, RESET YOUR COOKIES!
#
# ~~~~~ ANYTHING ELSE? ~~~~~
# THAT'S IT, ENJOY!
# Made by @escargotbuffed, further improvements by @symptom6186
performerByURL:
- action: scrapeXPath
url:
- https://anidb.net
scraper: performerScraper
performerByName:
action: scrapeXPath
queryURL: https://anidb.net/search/anime/?adb.search={}&entity.chartb=1
scraper: performerSearch
sceneByFragment:
action: scrapeXPath
queryURL: https://anidb.net/anime/?adb.search={filename}
queryURLReplace:
filename:
- regex: '\[.*?\]|\(.*?\)|\d\d|\..*'
with:
- regex: '\-|\_'
with: " "
- regex: \s+
with: "%20"
scraper: sceneScraperAnime
sceneByURL:
- action: scrapeXPath
url:
- https://anidb.net/episode/
scraper: sceneScraperEpisode
- action: scrapeXPath
url:
- https://anidb.net/anime/
scraper: sceneScraperAnime
sceneByName:
action: scrapeXPath
queryURL: https://anidb.net/search/anime/?adb.search={}&entity.animetb=1
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraperAnime
movieByURL:
- action: scrapeXPath
url:
- https://anidb.net/
scraper: sceneScraperAnime
xPathScrapers:
performerSearch:
performer:
Name: //td[@class="relid"]/a
URL:
selector: //td[@class="relid"]/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
performerScraper:
common:
$info: //div[@class="g_section info"]
$tab_1_pane: //div[@class="g_section info"]//div[@id="tab_1_pane"]
$looks: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, 'looks')]
performer:
Name: $tab_1_pane//tr[contains(@class, 'mainname')]//span[@itemprop="name"]
Aliases: $tab_1_pane//tr[contains(@class, 'official')]//label[@itemprop="alternateName"]
Disambiguation: $tab_1_pane//tr[contains(@class, 'mainname')]//a[@class='shortlink']
Gender: $tab_1_pane//tr[contains(@class, 'gender')]//span[@itemprop="gender"]
Ethnicity: $tab_1_pane//tr[contains(@class, 'entity')]//span[@class="tagname"]
HairColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'hair')]
EyeColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'eyes')]
Height: $tab_1_pane//tr[contains(@class, 'height')]//span[@itemprop="height"]
Weight: $tab_1_pane//tr[contains(@class, 'weight')]//span[@itemprop="weight"]
#Measurements: Todo
URL: //link[@rel="canonical"]/@href
Details:
selector: //div[@itemprop="description"]//text()
concat: "\n"
Tags:
Name: $tab_1_pane//span[@class="g_tag"]//span[@class="tagname"]
Image: $info//div[@class="image"]//img/@src
sceneSearch:
scene:
Title: //td[@class="relid"]/a
URL:
selector: //td[@class="relid"]/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Image: //td[@class="thumb anime"]//img/@src
sceneScraperEpisode:
scene:
Title:
selector: //div[@id="layout-main"]//h1[@class="ep"]
postProcess:
- replace:
- regex: ^.{0,9}
with: ""
- regex: \- \d+ \-
with: "/"
URL:
selector: //ul[@class="main-tabs"]//li[@class="g_odd anime"]//span/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Date: //div[@id="layout-main"]//span[@itemprop="datePublished"]/@content
sceneScraperAnime:
common:
$info: //div[@class="g_section info"]
$title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//span[@itemprop="name"]
$en_title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, "official verified") and contains(.//span, 'en')]//label[@itemprop="alternateName"]
$character: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"]
scene:
#Title: $en_title or $title
#Date:
# selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")]
# parseDate: 02.01.2006
Details:
selector: //div[@itemprop="description"]//text()
concat: " "
Tags:
Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"]
Performers:
Name: $character/a/span
URL:
selector: $character/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Movies:
Name: $title
URL: //link[@rel="canonical"]/@href
Studio:
Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a
Image: $info//div[@class="image"]//img/@src
URL: //link[@rel="canonical"]/@href
movie:
Name: $title
Aliases: $en_title
Date:
selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")]
postProcess:
- parseDate: 02.01.2006
Synopsis:
selector: //div[@itemprop="description"]//text()
concat: " "
Studio:
Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a
FrontImage: $info//div[@class="image"]//img/@src
URL: //link[@rel="canonical"]/@href
driver:
cookies:
- CookieURL: "https://anidb.net/"
Cookies:
# Access adult content requires a anidb account
# Replace value field
- Name: "adbsess"
Domain: "anidb.net"
Value: "" # Enter the value of the 'adbsess' here
Path: "/"
- Name: "adbuin"
Domain: "anidb.net"
Value: "" # Enter the value of the 'adbuin' here
Path: "/"
# Last Updated Dec 20, 2023