98 lines
3.0 KiB
YAML
98 lines
3.0 KiB
YAML
# yaml-language-server: $schema=../validator/scraper.schema.json
|
|
|
|
name: "VRBangers"
|
|
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- blowvr.com
|
|
- vrbangers.com
|
|
- vrbgay.com
|
|
- vrbtrans.com
|
|
- vrconk.com
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- vrbangers.com
|
|
scraper: movieScraper
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
# url format: https://{studio}.com/video/{title}
|
|
queryURL: "{filename}"
|
|
queryURLReplace:
|
|
# filename format:
|
|
# {studio}_{title}_{res}_{vrtype}.{ext}
|
|
# res: oculus|\d+k
|
|
# vrtype: stuff we do not care about but could contain '_'
|
|
filename:
|
|
- regex: (?i)^(vrbangers|vrbgay|vrbtrans|vrconk)_(.+)_(?:oculus|\dk)_.+$
|
|
with: https://$1.com/video/$2
|
|
# Titles have spaces replaced with '_' and the url expects '-' instead of spaces
|
|
- regex: _
|
|
with: "-"
|
|
scraper: sceneScraper
|
|
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$info: &info //div[starts-with(@class,"video-item__info ")]|//div[@class="single-video-info"]
|
|
scene:
|
|
Title: &titleSel //h1
|
|
Date: &dateAttr
|
|
selector: $info//span[text()="Release date:"]/following-sibling::text()
|
|
postProcess:
|
|
- parseDate: Jan 2, 2006
|
|
Details: &detailsAttr
|
|
selector: //div[contains(@class,"second-text")]/div//text()|//div[contains(@class,"single-video-description")]/div//text()
|
|
concat: " "
|
|
Tags:
|
|
Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text()|//div[@class="single-video-categories"]//a[contains(@href,"category/")]/text()
|
|
Performers:
|
|
Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text()|//div[contains(@class, "single-video-info__starring")]//a/text()
|
|
Studio:
|
|
Name: &studioName
|
|
selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^//
|
|
with: ""
|
|
- map:
|
|
blowvr.com: Blow VR
|
|
vrbangers.com: VR Bangers
|
|
vrbgay.com: VRB Gay
|
|
vrbtrans.com: VRB Trans
|
|
vrconk.com: VR Conk
|
|
URL: &studioURL
|
|
selector: *studioURLSel
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "https:"
|
|
Image: &imageSel //meta[@property="og:image"]/@content
|
|
URL: //link[@rel="canonical"]/@href
|
|
|
|
movieScraper:
|
|
common:
|
|
$info: *info
|
|
movie:
|
|
Name:
|
|
selector: *titleSel
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "VRBangers - "
|
|
Duration:
|
|
selector: $info//span[contains(text(),"Duration:")]/following-sibling::text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: \smin
|
|
with: ":00"
|
|
Date: *dateAttr
|
|
Synopsis: *detailsAttr
|
|
Studio:
|
|
Name: *studioName
|
|
URL: *studioURL
|
|
FrontImage: *imageSel
|
|
# Last Updated April 2, 2024
|