100 lines
3.1 KiB
YAML
100 lines
3.1 KiB
YAML
# yaml-language-server: $schema=../validator/scraper.schema.json
|
|
|
|
name: "VirtualRealPorn"
|
|
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url: &urlSel
|
|
- virtualrealamateurporn.com
|
|
- virtualrealgay.com
|
|
- virtualrealjapan.com
|
|
- virtualrealpassion.com
|
|
- virtualrealporn.com
|
|
- virtualrealtrans.com
|
|
scraper: sceneScraper
|
|
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
# url format: https://virtualrealporn.com/vr-porn-video/{title}
|
|
# title is part of the official filename for more recent scenes
|
|
# however, title will have spaces replaced with '_' in the file name
|
|
queryURL: https://virtualrealporn.com/vr-porn-video/{filename}/
|
|
queryURLReplace:
|
|
# filename format:
|
|
# - older scenes
|
|
# VirtualRealPorn.com_-_{title}_-_{misc}.{ext}
|
|
# misc: could be vrtype, instructios for their player etc. Not relevant.
|
|
# - new scenes:
|
|
# VirtualRealPorn_{title}_{res}_{vrtype}.{ext}
|
|
# res: oculus|\d+k
|
|
# vrtype: stuff we do not care about but could contain '_'
|
|
filename:
|
|
- regex: (?i)^(?:VirtualRealPorn_(?P<title>.+)_(?:oculus|\dk)_.+)|(?:VirtualRealPorn.com_-_(?P<title>.+?)_-_.+)$
|
|
with: $title
|
|
# title has spaces replaced with '_' in the filename
|
|
# but we need '-' for the url
|
|
- regex: _
|
|
with: "-"
|
|
scraper: sceneScraper
|
|
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url: *urlSel
|
|
scraper: movieScraper
|
|
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
scene:
|
|
Title: &titleDef
|
|
selector: //h1[@class="titleVideo"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: VR [\s\w]+? video$
|
|
with: ""
|
|
Date: &dateAttr
|
|
selector: //div[@class="video-date"]/span
|
|
postProcess:
|
|
- parseDate: Jan 2, 2006
|
|
Details: &detailsSel
|
|
selector: //div[@class="g-cols onlydesktop"]/p
|
|
concat: "\n\n"
|
|
Tags:
|
|
Name: //div[@class="metaHolder"]//a/span/text()
|
|
Performers:
|
|
Name:
|
|
selector: //h1[@class="titleVideo"]/following-sibling::p
|
|
postProcess:
|
|
- replace:
|
|
- regex: \s&\s
|
|
with: ", "
|
|
split: ", "
|
|
Studio:
|
|
Name: &studioName
|
|
selector: //meta[@property="og:site_name"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: \.com
|
|
with: ""
|
|
- map:
|
|
VirtualRealAmateurPorn: "VirtualRealAmateur"
|
|
Image: &imageSel //meta[@property="og:image"]/@content
|
|
URL: //link[@rel="canonical"]/@href
|
|
# Movies:
|
|
# Name:
|
|
# selector: //meta[@property="og:site_name"]/@content|//h1[@class="titleVideo"]
|
|
# concat: " - "
|
|
# Date: *dateAttr
|
|
# Synopsis: *detailsSel
|
|
# URL: //meta[@property="og:url"]/@content
|
|
movieScraper:
|
|
movie:
|
|
Name:
|
|
selector: //meta[@property="og:site_name"]/@content|//h1[@class="titleVideo"]
|
|
concat: " - "
|
|
Date: *dateAttr
|
|
Studio:
|
|
Name: *studioName
|
|
Synopsis: *detailsSel
|
|
FrontImage: *imageSel
|
|
# Last Updated November 25, 2022
|