117 lines
3.7 KiB
YAML
117 lines
3.7 KiB
YAML
name: POVR
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- povr.com
|
|
scraper: sceneScraper
|
|
- action: scrapeXPath
|
|
url:
|
|
- brasilvr.com
|
|
- milfvr.com
|
|
- tranzvr.com
|
|
- wankzvr.com
|
|
scraper: sceneScraperPremium
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- wankzvr.com
|
|
scraper: movieScraper
|
|
xPathScrapers:
|
|
sceneScraperPremium:
|
|
common:
|
|
$info: &infoSel //div[@class="detail"]
|
|
$url: &urlSel //link[@rel="canonical"]/@href
|
|
scene:
|
|
Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1
|
|
Date: &dateAttr
|
|
selector: $info//span[@class="detail__date"]/text()
|
|
postProcess:
|
|
- parseDate: 2 January, 2006
|
|
Details: &detailsAttr
|
|
selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text()
|
|
concat: " "
|
|
Tags:
|
|
Name: $info//div[@class="tag-list__body"]//a/text()
|
|
Performers:
|
|
Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text()
|
|
Image: &imageAttr
|
|
selector: //meta[@property="og:image"]/@content|//div[@class="photo-strip__body"]/div[2]/@data-src
|
|
postProcess:
|
|
- replace:
|
|
- regex: medium.jpg
|
|
with: large.jpg
|
|
# TranzVR defaults to smaller covers, but we can grab a bigger one
|
|
- regex: 472/cover.jpg
|
|
with: 680/cover.jpg
|
|
# All of these domains give 403 errors when saving the scraped image
|
|
# but povr.com has the same images and is totally cool with our scraping
|
|
- regex: cdns-i.wankzvr.com
|
|
with: images.povr.com/wvr
|
|
- regex: images.tranzvr.com
|
|
with: images.povr.com/tvr
|
|
- regex: cdns-i.milfvr.com
|
|
with: images.povr.com/mvr
|
|
- regex: cdns-i.brasilvr.com
|
|
with: images.povr.com
|
|
Studio: &studioAttr
|
|
Name:
|
|
selector: *urlSel
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*//(?:www.)?([^/]*).*$
|
|
with: $1
|
|
- map:
|
|
brasilvr.com: BrasilVR
|
|
milfvr.com: MilfVR
|
|
tranzvr.com: TranzVR
|
|
wankzvr.com: WankzVR
|
|
Code: &codeAttr
|
|
selector: *urlSel
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*-(\d+)$
|
|
with: $1
|
|
sceneScraper:
|
|
scene:
|
|
Title:
|
|
selector: //h1[contains(@class, "heading-title")]/text()
|
|
Date:
|
|
selector: //p[contains(@class, 'player__date')]
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*\s(\d+\s[a-zA-Z]+,\s\d+)$
|
|
with: $1
|
|
- parseDate: 2 January, 2006
|
|
Performers:
|
|
Name: //li[span[contains(text(), "Pornstars:")]]/following-sibling::li/a/text()
|
|
Tags:
|
|
Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text()
|
|
Details:
|
|
selector: //div[contains(@class, "player__description")]/p/text()
|
|
Image: *imageAttr
|
|
Studio:
|
|
Name:
|
|
selector: //span[contains(text(), "Studio:")]/following-sibling::a/text()
|
|
Code: *codeAttr
|
|
movieScraper:
|
|
common:
|
|
$info: *infoSel
|
|
movie:
|
|
Name:
|
|
selector: *titleSel
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "WankzVR - "
|
|
Duration:
|
|
selector: $info//span[@class="time"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: \smin
|
|
with: ":00"
|
|
Date: *dateAttr
|
|
Studio: *studioAttr
|
|
Synopsis: *detailsAttr
|
|
FrontImage: *imageAttr
|
|
# Last Updated June 28, 2023
|