Files
compose-projects-arr/stash/config/scrapers/community/Baberotica/Baberotica.yml
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

209 lines
8.2 KiB
YAML

name: Baberotica
sceneByURL:
- action: scrapeXPath
url: &urls
- avidolz.com
- baberotica.com
- baberoticavr.com
- nucosplay.com
- suckmevr.com
- teenthais.com
- vrpornpass.com
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url: *urls
scraper: performerScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h5/a | //h1[@itemprop="name"] | //div[@class="rel"]/img/@alt
Performers:
Name: //p/span[@itemprop="actor"]/a/span | //div[@class="cat"][1]/a/text()
URL: //div[@class="cat"][1]/a/@href | //span[@itemprop="actor"]/a/@href
Date:
# vrpornpass.com does not currently provide the scene published date on the non-paid site
selector: //div[contains(@class, "pure-u-1")]//meta[@itemprop="datePublished"]/@content | //div[@class="video-date"]
postProcess:
- replace:
- regex: .*Added:\s*(.*)\s*$
with: $1
- regex: "T.+$"
with: ""
- regex: (\d+)(st|nd|rd|th)
with: "$1"
- parseDate: 2006-01-02
- parseDate: January 02, 2006
- parseDate: 2006-01-02T15:04:05-07:00
Tags:
Name: //a[@itemprop="genre"] | //div[@class="cat"][2]/a
Details:
selector: //div[@itemprop="description"] | //p[span[@class="readmore"]]
postProcess:
- replace:
- regex: ... Read More
with:
Image:
selector: //div[contains(@class,"pure-u-1")]/meta[@itemprop="thumbnailUrl"]/@content | //div[@id="videohtml5tour"]//img[@class="pure-img"]/@src | //div[@id="videohtml5tour"]/video/@poster | //div[@class="rel"]//img/@src
postProcess:
- replace:
- regex: ^//
with: https://
URL:
selector: //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="title"]/h5/a/@href
postProcess:
- replace:
- regex: \s*(.*)\s*feed/$
with: $1
Studio:
Name:
selector: //meta[@itemprop="url"]/@content
postProcess:
- map:
https://avidolz.com/: AvIdolz
https://baberotica.com/: Baberotica
https://baberoticavr.com/: BaberoticaVR
https://nucosplay.com/: Nu Cosplay
https://suckmevr.com/: SuckMeVR
https://teenthais.com/: TeenThais
https://vrpornpass.com/: VR PornPass
performerScraper:
common:
$profileBE: //div[@class="model-profile"]
$profileTT: //div[@class="pure-u-1-4 pure-u-sm-1-2 pure-u-xs-1"]
$profilePP: //div[@class="pure-u-1-4 pure-u-sm-1-2 pure-u-xs-1-3"]
performer:
Name:
selector: $profileBE[contains(strong, "Name:")]//text() | //meta[@itemprop="title"]/@content | //div[@class="model-info"]//h1[@itemprop="name"]/text()
postProcess:
- replace:
- regex: .*Name:\s*(.*)\s*$
with: $1
- regex: None
with:
Aliases:
selector: $profileBE[contains(strong, "Alias name:")]//text()
postProcess:
- replace:
- regex: .*Alias name:\s*(.*)\s*$
with: $1
- regex: None
with:
Gender:
# These sites only have profiles for female performers, so using fixed: Female
fixed: Female
Birthdate:
selector: $profileBE[contains(strong, "Birth date:")]//text() | $profilePP[contains(strong, "Birth date:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: .*Birth date:\s*(.*)\s*$
with: $1
- parseDate: January 2, 2006
- parseDate: 2006-01-02
Country:
selector: $profileBE[contains(strong, "Country:")]//text() | $profileTT[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Country:")]/following-sibling::*[1]/text() | //div[@class="video-info"]/meta[@content="Japanese idol"]/@content
postProcess:
- replace:
- regex: .*Country:\s*(.*)\s*$
with: $1
- regex: "None|Other Country"
with:
- regex: ^Thai$
with: Thailand
- regex: ^Japanese idol$
with: Japan
Ethnicity:
selector: $profileBE[contains(strong, "Ethnicity:")]//text() | $profilePP[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | $profileTT[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | //div[@class="video-info"]/meta[@content="Japanese idol"]/@content
postProcess:
- replace:
- regex: .*Ethnicity:\s*(.*)\s*$
with: $1
- regex: None
with:
- regex: ^Thai$
with: Asian
- regex: ^Japanese idol$
with: Asian
Weight:
selector: $profileBE[contains(strong, "Weight:")]//text()
postProcess:
- replace:
- regex: .*Weight:\s*(.*)\s*$
with: $1
- regex: (\d+)kg
with: $1
- regex: None
with:
Height:
selector: $profileBE[contains(strong, "Height:")]//text() | $profileTT[contains(strong, "Height:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: .*Height:\s*(.*)\s*$
with: $1
- regex: ^cm$
with:
- regex: (\d+)cm.*
with: $1
- regex: None
with:
HairColor:
selector: $profileBE[contains(strong, "Hair color:")]//text() | $profileTT[contains(strong, "Hair color:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Hair color:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: (?i).*hair color:?\s*(.*)\s*$
with: $1
- regex: None|N/A
with:
EyeColor:
selector: $profileBE[contains(strong, "Eye color:")]//text() | $profileTT[contains(strong, "Eye color:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Eye Color:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: (?i).*eye color:?\s*(.*)\s*$
with: $1
- regex: None|N/A
with:
Piercings:
selector: $profileBE[contains(strong, "Piercings:")]//text() | $profilePP[contains(strong, "Piercings:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: .*Piercings:\s*(.*)\s*$
with: $1
- regex: "None|No Piercings|^No$"
with:
Tattoos:
selector: $profileBE[contains(strong, "Tattoo:")]//text() | $profilePP[contains(strong, "Tattoos:")]/following-sibling::*[1]/text()
postProcess:
- replace:
- regex: .*Tattoo:\s*(.*)\s*$
with: $1
- regex: "None$|^No$|(?i)^No tattoo$"
with:
Instagram:
selector: $profileBE[contains(strong, "Instagram:")]/a/@href
Twitter:
selector: $profileBE[contains(strong, "Twitter:")]/a/@href
Details:
selector: //p[@itemprop="description"]//text() | //p[span[@class="readmore"]]
postProcess:
- replace:
- regex: ... Read More
with:
URL:
selector: //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="model-info"]/a/@href
postProcess:
- replace:
- regex: \s*(.*)\s*feed/$
with: $1
Image:
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src
postProcess:
- replace:
- regex: ^//
with: https://
- regex: 460x640
with: 690x960
- regex: 270x480
with: 405x720
# Last Updated October 10, 2023