# yaml-language-server: $schema=../validator/scraper.schema.json name: "VRBangers" sceneByURL: - action: scrapeXPath url: - blowvr.com - vrbangers.com - vrbgay.com - vrbtrans.com - vrconk.com scraper: sceneScraper movieByURL: - action: scrapeXPath url: - vrbangers.com scraper: movieScraper sceneByFragment: action: scrapeXPath # url format: https://{studio}.com/video/{title} queryURL: "{filename}" queryURLReplace: # filename format: # {studio}_{title}_{res}_{vrtype}.{ext} # res: oculus|\d+k # vrtype: stuff we do not care about but could contain '_' filename: - regex: (?i)^(vrbangers|vrbgay|vrbtrans|vrconk)_(.+)_(?:oculus|\dk)_.+$ with: https://$1.com/video/$2 # Titles have spaces replaced with '_' and the url expects '-' instead of spaces - regex: _ with: "-" scraper: sceneScraper xPathScrapers: sceneScraper: common: $info: &info //div[starts-with(@class,"video-item__info ")]|//div[@class="single-video-info"] scene: Title: &titleSel //h1 Date: &dateAttr selector: $info//span[text()="Release date:"]/following-sibling::text() postProcess: - parseDate: Jan 2, 2006 Details: &detailsAttr selector: //div[contains(@class,"second-text")]/div//text()|//div[contains(@class,"single-video-description")]/div//text() concat: " " Tags: Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text()|//div[@class="single-video-categories"]//a[contains(@href,"category/")]/text() Performers: Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text()|//div[contains(@class, "single-video-info__starring")]//a/text() Studio: Name: &studioName selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content postProcess: - replace: - regex: ^// with: "" - map: blowvr.com: Blow VR vrbangers.com: VR Bangers vrbgay.com: VRB Gay vrbtrans.com: VRB Trans vrconk.com: VR Conk URL: &studioURL selector: *studioURLSel postProcess: - replace: - regex: ^ with: "https:" Image: &imageSel //meta[@property="og:image"]/@content URL: //link[@rel="canonical"]/@href movieScraper: common: $info: *info movie: Name: selector: *titleSel postProcess: - replace: - regex: ^ with: "VRBangers - " Duration: selector: $info//span[contains(text(),"Duration:")]/following-sibling::text() postProcess: - replace: - regex: \smin with: ":00" Date: *dateAttr Synopsis: *detailsAttr Studio: Name: *studioName URL: *studioURL FrontImage: *imageSel # Last Updated April 2, 2024