This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
name: TheNude
performerByName:
action: scrapeXPath
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- https://www.thenude.com
scraper: performerScraper
xPathScrapers:
performerSearch:
performer:
# Name: //a[@class="model-name"]/text() Version to get only the name
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
Name:
selector: //figcaption/span
postProcess:
- replace:
- regex: "^AKA:"
with: ""
URL: //a[@class="model-name"]/@href
performerScraper:
performer:
Name: //span[@class="model-name"]/text()
URL:
selector: //meta[@itemprop="url"]/@content
Twitter: //a[text()="TWITTER"]/@href
Instagram: //a[text()="INSTAGRAM"]/@href
Birthdate:
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
postProcess:
- parseDate: January 2006
- parseDate: ??-??-2006
Ethnicity:
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
postProcess:
- replace:
- regex: Asian
with: "asian"
- regex: Caucasian
with: "white"
- regex: Black
with: "black"
- regex: Latin
with: "hispanic"
Country:
selector: //span[@itemprop="nationality"]/text()
postProcess:
- replace:
- regex: "United States of America"
with: "United States"
#EyeColor: not listed
Height:
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
postProcess:
- replace:
- regex: ^(\d+).+$
with: "$1"
Measurements:
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
FakeTits:
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
postProcess:
- replace:
- regex: ^[^\(]+\(([^\)]+)\).*$
with: "$1"
- regex: Fake
with: "Yes"
- regex: Real
with: "No"
CareerLength:
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
concat: "-"
Aliases:
selector: //meta[@itemprop="additionalName"]/@content
concat: ", "
Tattoos:
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
Piercings:
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
Image:
selector: //meta[@itemprop="url"]/@content
postProcess:
- replace:
- regex: https:\/\/www\.thenude\.com\/
with: "https://static.thenude.com/models/"
- regex: \.htm
with: "/medhead.jpg"
Gender:
selector: //meta[@itemprop="gender"]/@content
Details:
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
postProcess:
- replace:
- regex: ^
with: "Actual scraped Birthdate:"
# Last Updated August 25, 2022