stash
This commit is contained in:
149
stash/config/scrapers/community/Babepedia/Babepedia.yml
Normal file
149
stash/config/scrapers/community/Babepedia/Babepedia.yml
Normal file
@@ -0,0 +1,149 @@
|
||||
name: Babepedia
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.babepedia.com/search/{}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- https://www.babepedia.com
|
||||
scraper: performerScraper
|
||||
|
||||
xPathScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
Name:
|
||||
selector: //span[@class="results"]//a[contains(@href, '/babe/')]|//div[@id="bioarea"]/h1
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "aka(\\s+[A-Z]+)"
|
||||
with: " aka $1"
|
||||
|
||||
URL:
|
||||
selector: //span[@class="results"]//a[contains(@href, '/babe/')]/@href|//meta[@property='og:url']/@content
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: https://www.babepedia.com
|
||||
- regex: https:\/\/www\.babepedia\.comhttps:\/\/www\.babepedia\.com
|
||||
with: https://www.babepedia.com
|
||||
|
||||
performerScraper:
|
||||
common:
|
||||
$label: //div[@id='bioarea']//ul/li[span[@class='label']
|
||||
performer:
|
||||
Name: //div[@id="bioarea"]/h1
|
||||
Gender:
|
||||
fixed: "Female"
|
||||
URL: //head/meta[@property="og:url"]/@content
|
||||
Twitter:
|
||||
selector: //div[@id='socialicons']//a[img[@alt='Twitter account']]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: https://twitter.com/
|
||||
with: ""
|
||||
Instagram:
|
||||
selector: //div[@id='socialicons']//a[img[@alt='Instagram account']]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: https://instagram.com/
|
||||
with: ""
|
||||
Birthdate:
|
||||
selector: $label[text()='Born:']]//text()
|
||||
concat: " "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\w+:)(\s)(\w+)(\s)(\d+)(\w+)(\s)(\w+)(\s)(\w+)(\s)(\d+)
|
||||
with: $5 $10 $12
|
||||
- parseDate: 2 January 2006
|
||||
DeathDate:
|
||||
selector: $label[text()='Died:']]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+)(st|nd|rd|th)
|
||||
with: "$1"
|
||||
- regex: (\sof\s)|(\(age.+)
|
||||
with: " "
|
||||
- parseDate: Monday 2 January 2006
|
||||
Ethnicity:
|
||||
selector: $label[text()='Ethnicity:']]/a/text()|$label[text()='Ethnicity:']]/text()
|
||||
postProcess:
|
||||
- map:
|
||||
Asian: asian
|
||||
Caucasian: white
|
||||
Black: black
|
||||
Latin: hispanic
|
||||
Country: $label[text()='Birthplace']]/a[contains(@href,'topbabespercountry')]/text()
|
||||
EyeColor:
|
||||
selector: $label[text()='Eye color:']]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Eye color: "
|
||||
with:
|
||||
HairColor:
|
||||
selector: $label[text()='Hair color:']]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Hair color: "
|
||||
with:
|
||||
Height:
|
||||
selector: $label[text()='Height:']]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.*\s(\d+)\scm.*$
|
||||
with: $1
|
||||
Weight:
|
||||
selector: $label[text()='Weight:']]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.*\s(\d+)\skg.*$
|
||||
with: $1
|
||||
Measurements:
|
||||
selector: $label[text()='Measurements:']]/text()|$label[text()='Bra/cup size:']]/text()
|
||||
concat: "|"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d*)([a-zA-Z-]*)(\d*-\d*)(.+?)([a-zA-Z]+)(.*)
|
||||
with: $1$5-$3
|
||||
FakeTits:
|
||||
selector: $label[text()='Boobs:']]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Boobs: "
|
||||
with: ""
|
||||
- map:
|
||||
Real/Natural: "No"
|
||||
Fake/Enhanced: "Yes"
|
||||
CareerLength: $label[text()='Years active:']]/text()
|
||||
Aliases:
|
||||
selector: //div[@id='bioarea']//h2/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: aka
|
||||
with: ""
|
||||
- regex: '\/'
|
||||
with: ","
|
||||
Tattoos: $label[text()='Tattoos:']]/text()
|
||||
Piercings: $label[text()='Piercings:']]/text()
|
||||
Image:
|
||||
selector: //div[@id='profimg']//a[@class='img']/@href
|
||||
# Alternitive selector will use site logo as image if none is found
|
||||
# selector: //div[@id='profimg']//a/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: javascript\:alert.+;
|
||||
with: /images/logo.png
|
||||
- regex: ^
|
||||
with: https://www.babepedia.com
|
||||
Details:
|
||||
selector: //div[@class="babebanner separate"]/p//text()
|
||||
concat: "\n"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: '\nPerformances'
|
||||
with: "\n\nPerformances"
|
||||
- regex: '(and|with|of|,)\n'
|
||||
with: "$1 "
|
||||
- regex: '\n(,|\.|\))'
|
||||
with: "$1"
|
||||
# Last Updated January 05, 2022
|
||||
Reference in New Issue
Block a user