150 lines
4.7 KiB
YAML
150 lines
4.7 KiB
YAML
name: Babepedia
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.babepedia.com/search/{}
|
|
scraper: performerSearch
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- https://www.babepedia.com
|
|
scraper: performerScraper
|
|
|
|
xPathScrapers:
|
|
performerSearch:
|
|
performer:
|
|
Name:
|
|
selector: //span[@class="results"]//a[contains(@href, '/babe/')]|//div[@id="bioarea"]/h1
|
|
postProcess:
|
|
- replace:
|
|
- regex: "aka(\\s+[A-Z]+)"
|
|
with: " aka $1"
|
|
|
|
URL:
|
|
selector: //span[@class="results"]//a[contains(@href, '/babe/')]/@href|//meta[@property='og:url']/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://www.babepedia.com
|
|
- regex: https:\/\/www\.babepedia\.comhttps:\/\/www\.babepedia\.com
|
|
with: https://www.babepedia.com
|
|
|
|
performerScraper:
|
|
common:
|
|
$label: //div[@id='bioarea']//ul/li[span[@class='label']
|
|
performer:
|
|
Name: //div[@id="bioarea"]/h1
|
|
Gender:
|
|
fixed: "Female"
|
|
URL: //head/meta[@property="og:url"]/@content
|
|
Twitter:
|
|
selector: //div[@id='socialicons']//a[img[@alt='Twitter account']]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: https://twitter.com/
|
|
with: ""
|
|
Instagram:
|
|
selector: //div[@id='socialicons']//a[img[@alt='Instagram account']]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: https://instagram.com/
|
|
with: ""
|
|
Birthdate:
|
|
selector: $label[text()='Born:']]//text()
|
|
concat: " "
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\w+:)(\s)(\w+)(\s)(\d+)(\w+)(\s)(\w+)(\s)(\w+)(\s)(\d+)
|
|
with: $5 $10 $12
|
|
- parseDate: 2 January 2006
|
|
DeathDate:
|
|
selector: $label[text()='Died:']]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d+)(st|nd|rd|th)
|
|
with: "$1"
|
|
- regex: (\sof\s)|(\(age.+)
|
|
with: " "
|
|
- parseDate: Monday 2 January 2006
|
|
Ethnicity:
|
|
selector: $label[text()='Ethnicity:']]/a/text()|$label[text()='Ethnicity:']]/text()
|
|
postProcess:
|
|
- map:
|
|
Asian: asian
|
|
Caucasian: white
|
|
Black: black
|
|
Latin: hispanic
|
|
Country: $label[text()='Birthplace']]/a[contains(@href,'topbabespercountry')]/text()
|
|
EyeColor:
|
|
selector: $label[text()='Eye color:']]
|
|
postProcess:
|
|
- replace:
|
|
- regex: "Eye color: "
|
|
with:
|
|
HairColor:
|
|
selector: $label[text()='Hair color:']]
|
|
postProcess:
|
|
- replace:
|
|
- regex: "Hair color: "
|
|
with:
|
|
Height:
|
|
selector: $label[text()='Height:']]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*\s(\d+)\scm.*$
|
|
with: $1
|
|
Weight:
|
|
selector: $label[text()='Weight:']]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*\s(\d+)\skg.*$
|
|
with: $1
|
|
Measurements:
|
|
selector: $label[text()='Measurements:']]/text()|$label[text()='Bra/cup size:']]/text()
|
|
concat: "|"
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d*)([a-zA-Z-]*)(\d*-\d*)(.+?)([a-zA-Z]+)(.*)
|
|
with: $1$5-$3
|
|
FakeTits:
|
|
selector: $label[text()='Boobs:']]
|
|
postProcess:
|
|
- replace:
|
|
- regex: "Boobs: "
|
|
with: ""
|
|
- map:
|
|
Real/Natural: "No"
|
|
Fake/Enhanced: "Yes"
|
|
CareerLength: $label[text()='Years active:']]/text()
|
|
Aliases:
|
|
selector: //div[@id='bioarea']//h2/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: aka
|
|
with: ""
|
|
- regex: '\/'
|
|
with: ","
|
|
Tattoos: $label[text()='Tattoos:']]/text()
|
|
Piercings: $label[text()='Piercings:']]/text()
|
|
Image:
|
|
selector: //div[@id='profimg']//a[@class='img']/@href
|
|
# Alternitive selector will use site logo as image if none is found
|
|
# selector: //div[@id='profimg']//a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: javascript\:alert.+;
|
|
with: /images/logo.png
|
|
- regex: ^
|
|
with: https://www.babepedia.com
|
|
Details:
|
|
selector: //div[@class="babebanner separate"]/p//text()
|
|
concat: "\n"
|
|
postProcess:
|
|
- replace:
|
|
- regex: '\nPerformances'
|
|
with: "\n\nPerformances"
|
|
- regex: '(and|with|of|,)\n'
|
|
with: "$1 "
|
|
- regex: '\n(,|\.|\))'
|
|
with: "$1"
|
|
# Last Updated January 05, 2022
|