100 lines
3.4 KiB
YAML
100 lines
3.4 KiB
YAML
name: TheNude
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
|
|
scraper: performerSearch
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- https://www.thenude.com
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
performerSearch:
|
|
performer:
|
|
# Name: //a[@class="model-name"]/text() Version to get only the name
|
|
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
|
|
Name:
|
|
selector: //figcaption/span
|
|
postProcess:
|
|
- replace:
|
|
- regex: "^AKA:"
|
|
with: ""
|
|
URL: //a[@class="model-name"]/@href
|
|
performerScraper:
|
|
performer:
|
|
Name: //span[@class="model-name"]/text()
|
|
URL:
|
|
selector: //meta[@itemprop="url"]/@content
|
|
Twitter: //a[text()="TWITTER"]/@href
|
|
Instagram: //a[text()="INSTAGRAM"]/@href
|
|
Birthdate:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
|
postProcess:
|
|
- parseDate: January 2006
|
|
- parseDate: ??-??-2006
|
|
Ethnicity:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: Asian
|
|
with: "asian"
|
|
- regex: Caucasian
|
|
with: "white"
|
|
- regex: Black
|
|
with: "black"
|
|
- regex: Latin
|
|
with: "hispanic"
|
|
Country:
|
|
selector: //span[@itemprop="nationality"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: "United States of America"
|
|
with: "United States"
|
|
#EyeColor: not listed
|
|
Height:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(\d+).+$
|
|
with: "$1"
|
|
Measurements:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
|
|
FakeTits:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^[^\(]+\(([^\)]+)\).*$
|
|
with: "$1"
|
|
- regex: Fake
|
|
with: "Yes"
|
|
- regex: Real
|
|
with: "No"
|
|
CareerLength:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
|
|
concat: "-"
|
|
Aliases:
|
|
selector: //meta[@itemprop="additionalName"]/@content
|
|
concat: ", "
|
|
Tattoos:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
|
|
Piercings:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
|
|
Image:
|
|
selector: //meta[@itemprop="url"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: https:\/\/www\.thenude\.com\/
|
|
with: "https://static.thenude.com/models/"
|
|
- regex: \.htm
|
|
with: "/medhead.jpg"
|
|
Gender:
|
|
selector: //meta[@itemprop="gender"]/@content
|
|
Details:
|
|
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "Actual scraped Birthdate:"
|
|
|
|
# Last Updated August 25, 2022
|