name: POVR sceneByURL: - action: scrapeXPath url: - povr.com scraper: sceneScraper - action: scrapeXPath url: - brasilvr.com - milfvr.com - tranzvr.com - wankzvr.com scraper: sceneScraperPremium movieByURL: - action: scrapeXPath url: - wankzvr.com scraper: movieScraper xPathScrapers: sceneScraperPremium: common: $info: &infoSel //div[@class="detail"] $url: &urlSel //link[@rel="canonical"]/@href scene: Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 Date: &dateAttr selector: $info//span[@class="detail__date"]/text() postProcess: - parseDate: 2 January, 2006 Details: &detailsAttr selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() concat: " " Tags: Name: $info//div[@class="tag-list__body"]//a/text() Performers: Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() Image: &imageAttr selector: //meta[@property="og:image"]/@content|//div[@class="photo-strip__body"]/div[2]/@data-src postProcess: - replace: - regex: medium.jpg with: large.jpg # TranzVR defaults to smaller covers, but we can grab a bigger one - regex: 472/cover.jpg with: 680/cover.jpg # All of these domains give 403 errors when saving the scraped image # but povr.com has the same images and is totally cool with our scraping - regex: cdns-i.wankzvr.com with: images.povr.com/wvr - regex: images.tranzvr.com with: images.povr.com/tvr - regex: cdns-i.milfvr.com with: images.povr.com/mvr - regex: cdns-i.brasilvr.com with: images.povr.com Studio: &studioAttr Name: selector: *urlSel postProcess: - replace: - regex: ^.*//(?:www.)?([^/]*).*$ with: $1 - map: brasilvr.com: BrasilVR milfvr.com: MilfVR tranzvr.com: TranzVR wankzvr.com: WankzVR Code: &codeAttr selector: *urlSel postProcess: - replace: - regex: ^.*-(\d+)$ with: $1 sceneScraper: scene: Title: selector: //h1[contains(@class, "heading-title")]/text() Date: selector: //p[contains(@class, 'player__date')] postProcess: - replace: - regex: .*\s(\d+\s[a-zA-Z]+,\s\d+)$ with: $1 - parseDate: 2 January, 2006 Performers: Name: //li[span[contains(text(), "Pornstars:")]]/following-sibling::li/a/text() Tags: Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text() Details: selector: //div[contains(@class, "player__description")]/p/text() Image: *imageAttr Studio: Name: selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() Code: *codeAttr movieScraper: common: $info: *infoSel movie: Name: selector: *titleSel postProcess: - replace: - regex: ^ with: "WankzVR - " Duration: selector: $info//span[@class="time"]/text() postProcess: - replace: - regex: \smin with: ":00" Date: *dateAttr Studio: *studioAttr Synopsis: *detailsAttr FrontImage: *imageAttr # Last Updated June 28, 2023