62 lines
2.2 KiB
YAML
62 lines
2.2 KiB
YAML
name: "Jav.Land"
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- jav.land
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- jav.land
|
|
scraper: movieScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$info: &infoSel //table[starts-with(@class,"videotextlist ")]
|
|
scene:
|
|
Title: &titleSel $info//td[strong[text()="DVD ID:"]]/following-sibling::td/text()
|
|
Date: &dateAttr $info//td[strong[text()="Release Date:"]]/following-sibling::td/text()
|
|
Details: &detailsSel //div[@class="col-xs-12"]/strong/text()
|
|
Tags:
|
|
Name: $info//td[strong[text()="Genre(s):"]]/following-sibling::td//a/text()
|
|
Performers:
|
|
Name: $info//td[strong[text()="Cast:"]]/following-sibling::td//a/text()
|
|
Studio:
|
|
Name: &studioName $info//td[strong[text()="Maker:"]]/following-sibling::td//a[1]/text()
|
|
Image: &imageSel //img[@class="img-responsive"]/@src
|
|
Movies:
|
|
# Automatically create a movie with the Title as the name.
|
|
# It will miss the Studio and FrontImage due to the limit of scraping.
|
|
# To add the missing Studio and FrontImage data, go to that movie and scrape with the URL again.
|
|
Name: *titleSel
|
|
Date: *dateAttr
|
|
Director: $info//td[strong[text()="Director:"]]/following-sibling::td//a[1]/text()
|
|
Duration: &durationAttr
|
|
selector: $info//td[strong[text()="Length:"]]/following-sibling::td/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: \(.+\)
|
|
with: ""
|
|
- regex: min
|
|
with: ":00"
|
|
URL:
|
|
selector: //script[contains(text(),"videoid")]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.+videoid\s*=\s*"([^"]+).+'
|
|
with: "https://jav.land/en/movie/$1.html"
|
|
Synopsis: *detailsSel
|
|
movieScraper:
|
|
common:
|
|
$info: *infoSel
|
|
movie:
|
|
Name: *titleSel
|
|
Duration: *durationAttr
|
|
Date: *dateAttr
|
|
Director: $info//td[strong[text()="Director:"]]/following-sibling::td//a[1]/text()
|
|
Studio:
|
|
Name: *studioName
|
|
Synopsis: *detailsSel
|
|
FrontImage: *imageSel
|
|
# Last Updated October 07, 2021
|