Files
compose-projects-arr/stash/config/scrapers/community/IMDB/IMDB.yml
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

146 lines
4.4 KiB
YAML

name: IMDB
performerByName:
action: scrapeXPath
queryURL: https://www.imdb.com/search/name-text/?bio={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- imdb.com
scraper: performerScraper
sceneByName:
action: scrapeXPath
queryURL: https://www.imdb.com/find?q={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
sceneByURL:
- action: scrapeXPath
url:
- imdb.com/title/
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- imdb.com/title/
scraper: movieScraper
xPathScrapers:
performerSearch:
common:
$listAnchor: //div[@class="lister-list"]/div[@class="lister-item mode-detail"]/div[@class="lister-item-content"]/h3/a
performer:
Name:
selector: $listAnchor/text()
URL:
selector: $listAnchor/@href
postProcess:
- replace:
- regex: ^
with: https://www.imdb.com
performerScraper:
performer:
Name: //td[@id="overview-top" or @class="name-overview-widget__section"]//h1/span[1]/text()
Birthdate:
selector: //time/@datetime
postProcess:
- parseDate: 2006-1-2
Image: &imageAttr
selector: //meta[@property="og:image"]/@content
postProcess:
- replace:
- regex: '.*/imdb[^/]*\.png'
with:
Details:
# selector: //div[@class="name-trivia-bio-text"]/div/text()[1]
selector: //span[@class='see-more inline nobr-only']/a/@href
postProcess:
- replace:
- regex: ^
with: "https://www.imdb.com/"
- subScraper:
selector: //div[@class="soda odd"]/p/text()
URL: //div[@id='details-official-sites']/a[contains(text(),'Official Site')]/@href
# Facebook: //div[@id='details-official-sites']/a[contains(text(),'Facebook')]/@href
Instagram: //div[@id='details-official-sites']/a[contains(@href,"instagram.com/")]/@href
Aliases:
selector: //div[@id="details-akas"]/h4/following-sibling::text()
concat: "|"
postProcess:
- replace:
- regex: '\|+'
with: ", "
Height:
selector: //div[@id="details-height"]/h4/following-sibling::text()
postProcess:
- replace:
- regex: (.+)\s*\(.+$
with: $1
- feetToCm: true
sceneSearch:
common:
$scenerow: //h3[text() = "Titles"]/following-sibling::table//tr[contains(@class, 'findResult')]
scene:
Title: $scenerow//td[@class='result_text']
URL:
selector: $scenerow//td[@class='result_text']/a/@href
postProcess:
- replace:
- regex: ^
with: https://imdb.com
Image: $scenerow//td[@class='primary_photo']//img/@src
sceneScraper:
scene:
Title: &title //section//h1
URL: &url //meta[@property="og:url"]/@content
Movies:
Name: *title
URL: *url
Date: &date
selector: //li[@data-testid='title-details-releasedate']/div/ul/li/a/text()
postProcess:
- replace:
- regex: '\s*\(.+$'
with: ""
- regex: '^(\d\d\d\d)$'
with: $1-01-01
- parseDate: January 2, 2006
Details: &desc //span[@data-testid="plot-xl"]
Tags:
Name: //div[@data-testid="genres"]/a/span
Performers:
Name: //a[@data-testid="title-cast-item__actor"]
Image: *imageAttr
Studio: &studio
Name: (//li[@data-testid="title-details-companies"]/div//a)[1]
movieScraper:
movie:
Name: *title
URL: *url
Date: *date
Director: //section[@data-testid="title-cast"]//li[span[text()="Director"]]//a
Duration:
selector: //ul[@data-testid="hero-title-block__metadata"]/li[last()]
postProcess:
- replace:
- regex: (\d+)h\s*
with: "$1:"
- regex: (\d+)m
with: "$1:00"
- regex: ":$" # only h
with: ":00:00"
- regex: ^(\d+:\d+)$ # only m
with: "00:$1"
Studio: *studio
Synopsis: *desc
FrontImage: *imageAttr
driver:
headers:
- Key: User-Agent
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0)
# Last Updated August 13, 2022