81 lines
2.7 KiB
YAML
81 lines
2.7 KiB
YAML
name: "XsList (JAV)"
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://xslist.org/search?query={}&lg=en
|
|
scraper: performerSearch
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- xslist.org/en/model/
|
|
scraper: performerScraper
|
|
|
|
xPathScrapers:
|
|
performerSearch:
|
|
performer:
|
|
Name: //li[@class="clearfix"]/h3/a/@title
|
|
URL:
|
|
selector: //li[@class="clearfix"]/h3/a/@href
|
|
|
|
performerScraper:
|
|
performer:
|
|
Name:
|
|
selector: //span[@itemprop="name"]/text()
|
|
#Uncomment below to convert to Surname Name (JavLibrary compatible)
|
|
#postProcess:
|
|
# - replace:
|
|
# - regex: (.+)(\s)(.+)
|
|
# with: $3$2$1
|
|
Aliases:
|
|
selector: //span[@itemprop="additionalName"]/text()|//div[@id="layout"]/div/h2[1]/text()
|
|
concat: ", "
|
|
postProcess:
|
|
- replace:
|
|
- regex: "(.+)( \\b[a-zA-Z]+\\s\\b[a-zA-Z]+)(.+?)([\\p{Han}\\p{Hiragana}\\p{Katakana}ー]+)(.+)"
|
|
with: $4, $1
|
|
- regex: "(\\b.+?|)([\\p{Han}\\p{Hiragana}\\p{Katakana}ー]+)(.+)(Profile)(.+)|(.+)"
|
|
with: $2$6
|
|
- regex: ^,|,$
|
|
with:
|
|
URL: //head/meta[@property="og:url"]/@content
|
|
Birthdate:
|
|
selector: //div[@id="layout"]/div/p[1]/text()[not(contains(.,"n/a")) and contains(.,"Born")]
|
|
postProcess:
|
|
- replace:
|
|
- regex: (Born:\s)(.+)
|
|
with: $2
|
|
- parseDate: January 2, 2006
|
|
Height:
|
|
selector: //span[@itemprop="height"]/text()[not(contains(.,"n/a"))]
|
|
postProcess:
|
|
- replace:
|
|
- regex: "cm"
|
|
with: ""
|
|
Measurements:
|
|
selector: //div[@id="layout"]/div/p[1]/text()[not(contains(.,"n/a")) and contains(.,"Measurements")]|//div[@id="layout"]/div/p[1]/text()[not(contains(.,"n/a")) and contains(.,"Cup Size")]
|
|
concat: "|"
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.+:\s\w)(\d*)(\s\/\s.?)(\d*)(\s\/\s.?)(\d*)(.+:\s)(\w*)(\s.*)
|
|
with: $2$8-$4-$6
|
|
- regex: "Measurements: B|W|H|\\s"
|
|
with: ""
|
|
- regex: "CupSize:|Cup"
|
|
with: ""
|
|
- regex: \/
|
|
with: "-"
|
|
CareerLength:
|
|
selector: //div[@id="layout"]/div/p[1]/text()[not(contains(.,"n/a")) and contains(.,"AV Activity")]
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.+)(\d{4})
|
|
with: $2
|
|
Image: //a[@class="gallery-item gallery-jpg" and number(@data-height)>number(@data-width)][1]/@href|//img[@class='profile_img']/@src
|
|
Ethnicity:
|
|
fixed: "asian"
|
|
Country:
|
|
fixed: "Japan"
|
|
Gender:
|
|
fixed: "Female"
|
|
|
|
# Last Updated August 20, 2020
|