89 lines
2.8 KiB
YAML
89 lines
2.8 KiB
YAML
name: JapanHDV
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- japanhdv.com
|
|
scraper: sceneScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- japanhdv.com/model
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$movieinfo: //div[@class="pure-u-1-5 hidden-sm hidden-xs"]/div[@class="video-info"]
|
|
scene:
|
|
Title: $movieinfo/p[starts-with(strong,"Title")]/text()
|
|
Details: //div[contains(@class, "video-description")]
|
|
Date:
|
|
selector: //meta[@itemprop="datePublished"]/@content
|
|
postProcess:
|
|
- parseDate: 2006-01-02T15:04:05-07:00
|
|
Performers:
|
|
Name: $movieinfo/p[starts-with(strong,"Actress")]/a
|
|
URL: $movieinfo/p[starts-with(strong,"Actress")]/a/@href
|
|
Tags:
|
|
Name: $movieinfo/p[starts-with(strong,"Categories")]/a
|
|
Image:
|
|
selector: //video[@id="videohtml5tour"]/@poster
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^//
|
|
with: "https://"
|
|
Studio:
|
|
Name:
|
|
fixed: Japan HDV
|
|
performerScraper:
|
|
performer:
|
|
Name: //div[@class="video-info"]//h2
|
|
Aliases: //div[contains(strong,"Japanese Name")]/following-sibling::div[1]/text()
|
|
Birthdate:
|
|
selector: //div[contains(strong,"Birth date")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- parseDate: January 2, 2006
|
|
Ethnicity:
|
|
selector: //div[contains(strong,"Ethnicity")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- map:
|
|
Japanese: Asian
|
|
Country:
|
|
selector: //div[contains(strong,"Ethnicity")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- map:
|
|
Japanese: Japan
|
|
Height:
|
|
selector: //div[contains(strong,"Height")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d+)cm.+
|
|
with: $1
|
|
- regex: ^cm.+ # no values
|
|
with:
|
|
Measurements:
|
|
selector: //div[contains(strong,"Body") or contains(strong,"Breasts Cup")]/following-sibling::div[1]/text()
|
|
concat: "-"
|
|
postProcess:
|
|
- replace:
|
|
- regex: 'B(\d+)-W(\d+)-H(\d+)-(\D+)'
|
|
with: "$1$4-$2-$3"
|
|
- regex: "N/A"
|
|
with:
|
|
- regex: '^\D.+'
|
|
with:
|
|
Image:
|
|
selector: //div[@class="thumb"]/img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^//
|
|
with: "https://"
|
|
Gender:
|
|
fixed: "Female"
|
|
Details: //div[@id="model"]//p
|
|
HairColor:
|
|
selector: //div[contains(strong,"Hair color")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- map:
|
|
N/A: ""
|
|
# Last Updated January 01, 2022
|