This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,97 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: "VRBangers"
sceneByURL:
- action: scrapeXPath
url:
- blowvr.com
- vrbangers.com
- vrbgay.com
- vrbtrans.com
- vrconk.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- vrbangers.com
scraper: movieScraper
sceneByFragment:
action: scrapeXPath
# url format: https://{studio}.com/video/{title}
queryURL: "{filename}"
queryURLReplace:
# filename format:
# {studio}_{title}_{res}_{vrtype}.{ext}
# res: oculus|\d+k
# vrtype: stuff we do not care about but could contain '_'
filename:
- regex: (?i)^(vrbangers|vrbgay|vrbtrans|vrconk)_(.+)_(?:oculus|\dk)_.+$
with: https://$1.com/video/$2
# Titles have spaces replaced with '_' and the url expects '-' instead of spaces
- regex: _
with: "-"
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$info: &info //div[starts-with(@class,"video-item__info ")]|//div[@class="single-video-info"]
scene:
Title: &titleSel //h1
Date: &dateAttr
selector: $info//span[text()="Release date:"]/following-sibling::text()
postProcess:
- parseDate: Jan 2, 2006
Details: &detailsAttr
selector: //div[contains(@class,"second-text")]/div//text()|//div[contains(@class,"single-video-description")]/div//text()
concat: " "
Tags:
Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text()|//div[@class="single-video-categories"]//a[contains(@href,"category/")]/text()
Performers:
Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text()|//div[contains(@class, "single-video-info__starring")]//a/text()
Studio:
Name: &studioName
selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content
postProcess:
- replace:
- regex: ^//
with: ""
- map:
blowvr.com: Blow VR
vrbangers.com: VR Bangers
vrbgay.com: VRB Gay
vrbtrans.com: VRB Trans
vrconk.com: VR Conk
URL: &studioURL
selector: *studioURLSel
postProcess:
- replace:
- regex: ^
with: "https:"
Image: &imageSel //meta[@property="og:image"]/@content
URL: //link[@rel="canonical"]/@href
movieScraper:
common:
$info: *info
movie:
Name:
selector: *titleSel
postProcess:
- replace:
- regex: ^
with: "VRBangers - "
Duration:
selector: $info//span[contains(text(),"Duration:")]/following-sibling::text()
postProcess:
- replace:
- regex: \smin
with: ":00"
Date: *dateAttr
Synopsis: *detailsAttr
Studio:
Name: *studioName
URL: *studioURL
FrontImage: *imageSel
# Last Updated April 2, 2024