This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,149 @@
name: Babepedia
performerByName:
action: scrapeXPath
queryURL: https://www.babepedia.com/search/{}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- https://www.babepedia.com
scraper: performerScraper
xPathScrapers:
performerSearch:
performer:
Name:
selector: //span[@class="results"]//a[contains(@href, '/babe/')]|//div[@id="bioarea"]/h1
postProcess:
- replace:
- regex: "aka(\\s+[A-Z]+)"
with: " aka $1"
URL:
selector: //span[@class="results"]//a[contains(@href, '/babe/')]/@href|//meta[@property='og:url']/@content
postProcess:
- replace:
- regex: ^
with: https://www.babepedia.com
- regex: https:\/\/www\.babepedia\.comhttps:\/\/www\.babepedia\.com
with: https://www.babepedia.com
performerScraper:
common:
$label: //div[@id='bioarea']//ul/li[span[@class='label']
performer:
Name: //div[@id="bioarea"]/h1
Gender:
fixed: "Female"
URL: //head/meta[@property="og:url"]/@content
Twitter:
selector: //div[@id='socialicons']//a[img[@alt='Twitter account']]/@href
postProcess:
- replace:
- regex: https://twitter.com/
with: ""
Instagram:
selector: //div[@id='socialicons']//a[img[@alt='Instagram account']]/@href
postProcess:
- replace:
- regex: https://instagram.com/
with: ""
Birthdate:
selector: $label[text()='Born:']]//text()
concat: " "
postProcess:
- replace:
- regex: (\w+:)(\s)(\w+)(\s)(\d+)(\w+)(\s)(\w+)(\s)(\w+)(\s)(\d+)
with: $5 $10 $12
- parseDate: 2 January 2006
DeathDate:
selector: $label[text()='Died:']]/text()
postProcess:
- replace:
- regex: (\d+)(st|nd|rd|th)
with: "$1"
- regex: (\sof\s)|(\(age.+)
with: " "
- parseDate: Monday 2 January 2006
Ethnicity:
selector: $label[text()='Ethnicity:']]/a/text()|$label[text()='Ethnicity:']]/text()
postProcess:
- map:
Asian: asian
Caucasian: white
Black: black
Latin: hispanic
Country: $label[text()='Birthplace']]/a[contains(@href,'topbabespercountry')]/text()
EyeColor:
selector: $label[text()='Eye color:']]
postProcess:
- replace:
- regex: "Eye color: "
with:
HairColor:
selector: $label[text()='Hair color:']]
postProcess:
- replace:
- regex: "Hair color: "
with:
Height:
selector: $label[text()='Height:']]/text()
postProcess:
- replace:
- regex: ^.*\s(\d+)\scm.*$
with: $1
Weight:
selector: $label[text()='Weight:']]/text()
postProcess:
- replace:
- regex: ^.*\s(\d+)\skg.*$
with: $1
Measurements:
selector: $label[text()='Measurements:']]/text()|$label[text()='Bra/cup size:']]/text()
concat: "|"
postProcess:
- replace:
- regex: (\d*)([a-zA-Z-]*)(\d*-\d*)(.+?)([a-zA-Z]+)(.*)
with: $1$5-$3
FakeTits:
selector: $label[text()='Boobs:']]
postProcess:
- replace:
- regex: "Boobs: "
with: ""
- map:
Real/Natural: "No"
Fake/Enhanced: "Yes"
CareerLength: $label[text()='Years active:']]/text()
Aliases:
selector: //div[@id='bioarea']//h2/text()
postProcess:
- replace:
- regex: aka
with: ""
- regex: '\/'
with: ","
Tattoos: $label[text()='Tattoos:']]/text()
Piercings: $label[text()='Piercings:']]/text()
Image:
selector: //div[@id='profimg']//a[@class='img']/@href
# Alternitive selector will use site logo as image if none is found
# selector: //div[@id='profimg']//a/@href
postProcess:
- replace:
- regex: javascript\:alert.+;
with: /images/logo.png
- regex: ^
with: https://www.babepedia.com
Details:
selector: //div[@class="babebanner separate"]/p//text()
concat: "\n"
postProcess:
- replace:
- regex: '\nPerformances'
with: "\n\nPerformances"
- regex: '(and|with|of|,)\n'
with: "$1 "
- regex: '\n(,|\.|\))'
with: "$1"
# Last Updated January 05, 2022

View File

@@ -0,0 +1,9 @@
id: Babepedia
name: Babepedia
metadata: {}
version: dd4bcfa
date: "2024-01-07 01:31:32"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Babepedia.yml