209 lines
8.2 KiB
YAML
209 lines
8.2 KiB
YAML
name: Baberotica
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url: &urls
|
|
- avidolz.com
|
|
- baberotica.com
|
|
- baberoticavr.com
|
|
- nucosplay.com
|
|
- suckmevr.com
|
|
- teenthais.com
|
|
- vrpornpass.com
|
|
scraper: sceneScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url: *urls
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
scene:
|
|
Title:
|
|
selector: //h5/a | //h1[@itemprop="name"] | //div[@class="rel"]/img/@alt
|
|
Performers:
|
|
Name: //p/span[@itemprop="actor"]/a/span | //div[@class="cat"][1]/a/text()
|
|
URL: //div[@class="cat"][1]/a/@href | //span[@itemprop="actor"]/a/@href
|
|
Date:
|
|
# vrpornpass.com does not currently provide the scene published date on the non-paid site
|
|
selector: //div[contains(@class, "pure-u-1")]//meta[@itemprop="datePublished"]/@content | //div[@class="video-date"]
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Added:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: "T.+$"
|
|
with: ""
|
|
- regex: (\d+)(st|nd|rd|th)
|
|
with: "$1"
|
|
- parseDate: 2006-01-02
|
|
- parseDate: January 02, 2006
|
|
- parseDate: 2006-01-02T15:04:05-07:00
|
|
Tags:
|
|
Name: //a[@itemprop="genre"] | //div[@class="cat"][2]/a
|
|
Details:
|
|
selector: //div[@itemprop="description"] | //p[span[@class="readmore"]]
|
|
postProcess:
|
|
- replace:
|
|
- regex: ... Read More
|
|
with:
|
|
Image:
|
|
selector: //div[contains(@class,"pure-u-1")]/meta[@itemprop="thumbnailUrl"]/@content | //div[@id="videohtml5tour"]//img[@class="pure-img"]/@src | //div[@id="videohtml5tour"]/video/@poster | //div[@class="rel"]//img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^//
|
|
with: https://
|
|
URL:
|
|
selector: //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="title"]/h5/a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: \s*(.*)\s*feed/$
|
|
with: $1
|
|
Studio:
|
|
Name:
|
|
selector: //meta[@itemprop="url"]/@content
|
|
postProcess:
|
|
- map:
|
|
https://avidolz.com/: AvIdolz
|
|
https://baberotica.com/: Baberotica
|
|
https://baberoticavr.com/: BaberoticaVR
|
|
https://nucosplay.com/: Nu Cosplay
|
|
https://suckmevr.com/: SuckMeVR
|
|
https://teenthais.com/: TeenThais
|
|
https://vrpornpass.com/: VR PornPass
|
|
performerScraper:
|
|
common:
|
|
$profileBE: //div[@class="model-profile"]
|
|
$profileTT: //div[@class="pure-u-1-4 pure-u-sm-1-2 pure-u-xs-1"]
|
|
$profilePP: //div[@class="pure-u-1-4 pure-u-sm-1-2 pure-u-xs-1-3"]
|
|
performer:
|
|
Name:
|
|
selector: $profileBE[contains(strong, "Name:")]//text() | //meta[@itemprop="title"]/@content | //div[@class="model-info"]//h1[@itemprop="name"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Name:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: None
|
|
with:
|
|
Aliases:
|
|
selector: $profileBE[contains(strong, "Alias name:")]//text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Alias name:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: None
|
|
with:
|
|
Gender:
|
|
# These sites only have profiles for female performers, so using fixed: Female
|
|
fixed: Female
|
|
Birthdate:
|
|
selector: $profileBE[contains(strong, "Birth date:")]//text() | $profilePP[contains(strong, "Birth date:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Birth date:\s*(.*)\s*$
|
|
with: $1
|
|
- parseDate: January 2, 2006
|
|
- parseDate: 2006-01-02
|
|
Country:
|
|
selector: $profileBE[contains(strong, "Country:")]//text() | $profileTT[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Country:")]/following-sibling::*[1]/text() | //div[@class="video-info"]/meta[@content="Japanese idol"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Country:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: "None|Other Country"
|
|
with:
|
|
- regex: ^Thai$
|
|
with: Thailand
|
|
- regex: ^Japanese idol$
|
|
with: Japan
|
|
Ethnicity:
|
|
selector: $profileBE[contains(strong, "Ethnicity:")]//text() | $profilePP[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | $profileTT[contains(strong, "Ethnicity:")]/following-sibling::*[1]/text() | //div[@class="video-info"]/meta[@content="Japanese idol"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Ethnicity:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: None
|
|
with:
|
|
- regex: ^Thai$
|
|
with: Asian
|
|
- regex: ^Japanese idol$
|
|
with: Asian
|
|
Weight:
|
|
selector: $profileBE[contains(strong, "Weight:")]//text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Weight:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: (\d+)kg
|
|
with: $1
|
|
- regex: None
|
|
with:
|
|
Height:
|
|
selector: $profileBE[contains(strong, "Height:")]//text() | $profileTT[contains(strong, "Height:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Height:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: ^cm$
|
|
with:
|
|
- regex: (\d+)cm.*
|
|
with: $1
|
|
- regex: None
|
|
with:
|
|
HairColor:
|
|
selector: $profileBE[contains(strong, "Hair color:")]//text() | $profileTT[contains(strong, "Hair color:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Hair color:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (?i).*hair color:?\s*(.*)\s*$
|
|
with: $1
|
|
- regex: None|N/A
|
|
with:
|
|
EyeColor:
|
|
selector: $profileBE[contains(strong, "Eye color:")]//text() | $profileTT[contains(strong, "Eye color:")]/following-sibling::*[1]/text() | $profilePP[contains(strong, "Eye Color:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (?i).*eye color:?\s*(.*)\s*$
|
|
with: $1
|
|
- regex: None|N/A
|
|
with:
|
|
Piercings:
|
|
selector: $profileBE[contains(strong, "Piercings:")]//text() | $profilePP[contains(strong, "Piercings:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Piercings:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: "None|No Piercings|^No$"
|
|
with:
|
|
Tattoos:
|
|
selector: $profileBE[contains(strong, "Tattoo:")]//text() | $profilePP[contains(strong, "Tattoos:")]/following-sibling::*[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*Tattoo:\s*(.*)\s*$
|
|
with: $1
|
|
- regex: "None$|^No$|(?i)^No tattoo$"
|
|
with:
|
|
Instagram:
|
|
selector: $profileBE[contains(strong, "Instagram:")]/a/@href
|
|
Twitter:
|
|
selector: $profileBE[contains(strong, "Twitter:")]/a/@href
|
|
Details:
|
|
selector: //p[@itemprop="description"]//text() | //p[span[@class="readmore"]]
|
|
postProcess:
|
|
- replace:
|
|
- regex: ... Read More
|
|
with:
|
|
URL:
|
|
selector: //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="model-info"]/a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: \s*(.*)\s*feed/$
|
|
with: $1
|
|
Image:
|
|
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^//
|
|
with: https://
|
|
- regex: 460x640
|
|
with: 690x960
|
|
- regex: 270x480
|
|
with: 405x720
|
|
# Last Updated October 10, 2023
|