name: "Jav.Land" sceneByURL: - action: scrapeXPath url: - jav.land scraper: sceneScraper movieByURL: - action: scrapeXPath url: - jav.land scraper: movieScraper xPathScrapers: sceneScraper: common: $info: &infoSel //table[starts-with(@class,"videotextlist ")] scene: Title: &titleSel $info//td[strong[text()="DVD ID:"]]/following-sibling::td/text() Date: &dateAttr $info//td[strong[text()="Release Date:"]]/following-sibling::td/text() Details: &detailsSel //div[@class="col-xs-12"]/strong/text() Tags: Name: $info//td[strong[text()="Genre(s):"]]/following-sibling::td//a/text() Performers: Name: $info//td[strong[text()="Cast:"]]/following-sibling::td//a/text() Studio: Name: &studioName $info//td[strong[text()="Maker:"]]/following-sibling::td//a[1]/text() Image: &imageSel //img[@class="img-responsive"]/@src Movies: # Automatically create a movie with the Title as the name. # It will miss the Studio and FrontImage due to the limit of scraping. # To add the missing Studio and FrontImage data, go to that movie and scrape with the URL again. Name: *titleSel Date: *dateAttr Director: $info//td[strong[text()="Director:"]]/following-sibling::td//a[1]/text() Duration: &durationAttr selector: $info//td[strong[text()="Length:"]]/following-sibling::td/text() postProcess: - replace: - regex: \(.+\) with: "" - regex: min with: ":00" URL: selector: //script[contains(text(),"videoid")]/text() postProcess: - replace: - regex: '.+videoid\s*=\s*"([^"]+).+' with: "https://jav.land/en/movie/$1.html" Synopsis: *detailsSel movieScraper: common: $info: *infoSel movie: Name: *titleSel Duration: *durationAttr Date: *dateAttr Director: $info//td[strong[text()="Director:"]]/following-sibling::td//a[1]/text() Studio: Name: *studioName Synopsis: *detailsSel FrontImage: *imageSel # Last Updated October 07, 2021