name: JavHub sceneByURL: - action: scrapeXPath url: - javhub.com scraper: sceneScraper sceneByName: action: scrapeXPath queryURL: https://tour.javhub.com/search?s={} scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper xPathScrapers: sceneScraper: scene: Details: //p[@class="MsoNormal"] Performers: Name: //div[@class="model-wrap"]//h5 Image: //video/@poster Title: //h1[@class="title"] Date: selector: //div[@class="container content-details-wrap"]//span[@class="pub-date"]/text() postProcess: &ppDate - replace: - regex: .+\s+([a-zA-Z]+\s+\d+,\s\d+) with: $1 - parseDate: January 02, 2006 Studio: Name: fixed: JavHub URL: //input[starts-with(@id,"copy-url")]/@value sceneSearch: common: # ignore search results that have join links (https://tour.javhub.com/join) $content: //div[@class="content-item"][div[a[not(@href="https://tour.javhub.com/join")]]] scene: Image: selector: $content//a/@data-images postProcess: - replace: - regex: '^.+(https:[^&]+01\.jpg).*' with: $1 - regex: '\\/' with: "/" Title: $content//h3[@class="title"] URL: $content//h3[@class="title"]/a/@href Date: selector: $content//span[@class="pub-date"]/text() postProcess: *ppDate # show duration to avoid false matches # there are duplicate scenes and scenes with identical titles Details: selector: $content//span[@class="video-duration"]/text() postProcess: - replace: - regex: ^ with: "Duration " # Last Updated February 26, 2022