This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: "VirtualRealPorn"
sceneByURL:
- action: scrapeXPath
url: &urlSel
- virtualrealamateurporn.com
- virtualrealgay.com
- virtualrealjapan.com
- virtualrealpassion.com
- virtualrealporn.com
- virtualrealtrans.com
scraper: sceneScraper
sceneByFragment:
action: scrapeXPath
# url format: https://virtualrealporn.com/vr-porn-video/{title}
# title is part of the official filename for more recent scenes
# however, title will have spaces replaced with '_' in the file name
queryURL: https://virtualrealporn.com/vr-porn-video/{filename}/
queryURLReplace:
# filename format:
# - older scenes
# VirtualRealPorn.com_-_{title}_-_{misc}.{ext}
# misc: could be vrtype, instructios for their player etc. Not relevant.
# - new scenes:
# VirtualRealPorn_{title}_{res}_{vrtype}.{ext}
# res: oculus|\d+k
# vrtype: stuff we do not care about but could contain '_'
filename:
- regex: (?i)^(?:VirtualRealPorn_(?P<title>.+)_(?:oculus|\dk)_.+)|(?:VirtualRealPorn.com_-_(?P<title>.+?)_-_.+)$
with: $title
# title has spaces replaced with '_' in the filename
# but we need '-' for the url
- regex: _
with: "-"
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url: *urlSel
scraper: movieScraper
xPathScrapers:
sceneScraper:
scene:
Title: &titleDef
selector: //h1[@class="titleVideo"]/text()
postProcess:
- replace:
- regex: VR [\s\w]+? video$
with: ""
Date: &dateAttr
selector: //div[@class="video-date"]/span
postProcess:
- parseDate: Jan 2, 2006
Details: &detailsSel
selector: //div[@class="g-cols onlydesktop"]/p
concat: "\n\n"
Tags:
Name: //div[@class="metaHolder"]//a/span/text()
Performers:
Name:
selector: //h1[@class="titleVideo"]/following-sibling::p
postProcess:
- replace:
- regex: \s&\s
with: ", "
split: ", "
Studio:
Name: &studioName
selector: //meta[@property="og:site_name"]/@content
postProcess:
- replace:
- regex: \.com
with: ""
- map:
VirtualRealAmateurPorn: "VirtualRealAmateur"
Image: &imageSel //meta[@property="og:image"]/@content
URL: //link[@rel="canonical"]/@href
# Movies:
# Name:
# selector: //meta[@property="og:site_name"]/@content|//h1[@class="titleVideo"]
# concat: " - "
# Date: *dateAttr
# Synopsis: *detailsSel
# URL: //meta[@property="og:url"]/@content
movieScraper:
movie:
Name:
selector: //meta[@property="og:site_name"]/@content|//h1[@class="titleVideo"]
concat: " - "
Date: *dateAttr
Studio:
Name: *studioName
Synopsis: *detailsSel
FrontImage: *imageSel
# Last Updated November 25, 2022