name: Babepedia performerByName: action: scrapeXPath queryURL: https://www.babepedia.com/search/{} scraper: performerSearch performerByURL: - action: scrapeXPath url: - https://www.babepedia.com scraper: performerScraper xPathScrapers: performerSearch: performer: Name: selector: //span[@class="results"]//a[contains(@href, '/babe/')]|//div[@id="bioarea"]/h1 postProcess: - replace: - regex: "aka(\\s+[A-Z]+)" with: " aka $1" URL: selector: //span[@class="results"]//a[contains(@href, '/babe/')]/@href|//meta[@property='og:url']/@content postProcess: - replace: - regex: ^ with: https://www.babepedia.com - regex: https:\/\/www\.babepedia\.comhttps:\/\/www\.babepedia\.com with: https://www.babepedia.com performerScraper: common: $label: //div[@id='bioarea']//ul/li[span[@class='label'] performer: Name: //div[@id="bioarea"]/h1 Gender: fixed: "Female" URL: //head/meta[@property="og:url"]/@content Twitter: selector: //div[@id='socialicons']//a[img[@alt='Twitter account']]/@href postProcess: - replace: - regex: https://twitter.com/ with: "" Instagram: selector: //div[@id='socialicons']//a[img[@alt='Instagram account']]/@href postProcess: - replace: - regex: https://instagram.com/ with: "" Birthdate: selector: $label[text()='Born:']]//text() concat: " " postProcess: - replace: - regex: (\w+:)(\s)(\w+)(\s)(\d+)(\w+)(\s)(\w+)(\s)(\w+)(\s)(\d+) with: $5 $10 $12 - parseDate: 2 January 2006 DeathDate: selector: $label[text()='Died:']]/text() postProcess: - replace: - regex: (\d+)(st|nd|rd|th) with: "$1" - regex: (\sof\s)|(\(age.+) with: " " - parseDate: Monday 2 January 2006 Ethnicity: selector: $label[text()='Ethnicity:']]/a/text()|$label[text()='Ethnicity:']]/text() postProcess: - map: Asian: asian Caucasian: white Black: black Latin: hispanic Country: $label[text()='Birthplace']]/a[contains(@href,'topbabespercountry')]/text() EyeColor: selector: $label[text()='Eye color:']] postProcess: - replace: - regex: "Eye color: " with: HairColor: selector: $label[text()='Hair color:']] postProcess: - replace: - regex: "Hair color: " with: Height: selector: $label[text()='Height:']]/text() postProcess: - replace: - regex: ^.*\s(\d+)\scm.*$ with: $1 Weight: selector: $label[text()='Weight:']]/text() postProcess: - replace: - regex: ^.*\s(\d+)\skg.*$ with: $1 Measurements: selector: $label[text()='Measurements:']]/text()|$label[text()='Bra/cup size:']]/text() concat: "|" postProcess: - replace: - regex: (\d*)([a-zA-Z-]*)(\d*-\d*)(.+?)([a-zA-Z]+)(.*) with: $1$5-$3 FakeTits: selector: $label[text()='Boobs:']] postProcess: - replace: - regex: "Boobs: " with: "" - map: Real/Natural: "No" Fake/Enhanced: "Yes" CareerLength: $label[text()='Years active:']]/text() Aliases: selector: //div[@id='bioarea']//h2/text() postProcess: - replace: - regex: aka with: "" - regex: '\/' with: "," Tattoos: $label[text()='Tattoos:']]/text() Piercings: $label[text()='Piercings:']]/text() Image: selector: //div[@id='profimg']//a[@class='img']/@href # Alternitive selector will use site logo as image if none is found # selector: //div[@id='profimg']//a/@href postProcess: - replace: - regex: javascript\:alert.+; with: /images/logo.png - regex: ^ with: https://www.babepedia.com Details: selector: //div[@class="babebanner separate"]/p//text() concat: "\n" postProcess: - replace: - regex: '\nPerformances' with: "\n\nPerformances" - regex: '(and|with|of|,)\n' with: "$1 " - regex: '\n(,|\.|\))' with: "$1" # Last Updated January 05, 2022