158 lines
5.4 KiB
YAML
158 lines
5.4 KiB
YAML
# yaml-language-server: $schema=../validator/scraper.schema.json
|
|
|
|
name: "BaDoink"
|
|
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url: &urls
|
|
- 18vr.com
|
|
- babevr.com
|
|
- badoinkvr.com
|
|
- kinkvr.com
|
|
- realvr.com
|
|
- vrcosplayx.com
|
|
scraper: sceneScraper
|
|
queryURL: "{url}"
|
|
queryURLReplace:
|
|
url:
|
|
# remove the members part of the url since the scrapper would not attempt to login
|
|
- regex: "/members/"
|
|
with: "/"
|
|
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
# url format: https://{studio}.com/vrpornvideo/{title}-{code}
|
|
# The url without the code is what we will use for scrapping
|
|
# since it will forward to the correct one:
|
|
# url format: https://{studio}.com/vrpornvideo/{title}
|
|
# both studio and title are part of the official file name
|
|
queryURL: "{filename}"
|
|
queryURLReplace:
|
|
# filename format:
|
|
# {studio}_{title}_{res}_{vrtype}.{ext}
|
|
# res: oculus|\d+k
|
|
# vrtype: stuff we do not care about but could contain '_'
|
|
filename:
|
|
- regex: (?i)^(18vr|babevr|badoinkvr|kinkvr|realvr|vrcosplayx)_(.+)_(?:oculus|\dk)_.+$
|
|
with: https://$1.com/vrpornvideo/$2
|
|
scraper: sceneScraper
|
|
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url: *urls
|
|
scraper: performerScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url: *urls
|
|
scraper: movieScraper
|
|
# Uncomment this section to enable Performer Search on this scraper
|
|
# performerByName:
|
|
# action: scrapeXPath
|
|
# queryURL: https://kinkvr.com/bdsm-performers/search/{}?all=1
|
|
# scraper: performerSearch
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$details: &detailsAttr //div[@class="video-rating-and-details"]
|
|
scene:
|
|
Title: //h1[contains(@class, "video-title")]/text()
|
|
Date: &dateSel
|
|
selector: $details//p[@class="video-upload-date"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: Uploaded:\s
|
|
with:
|
|
- parseDate: January 02, 2006
|
|
Details: &detailsSel $details//p[@class="video-description"]/text()
|
|
Tags:
|
|
Name: $details//p[@class="video-tags"]//a/text()
|
|
Performers:
|
|
Name: $details//p[@class="video-actors"]/a/text()
|
|
Studio:
|
|
Name: &studioName //meta[@name="dl8-customization-brand-name"]/@content
|
|
Image: &imageSel
|
|
selector: //img[@class="video-image"]/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: \?.+
|
|
with: ""
|
|
URL: &sceneUrl //link[@rel="canonical"]/@href
|
|
Code:
|
|
selector: *sceneUrl
|
|
postProcess:
|
|
- replace:
|
|
- regex: '^(.+)-(\d+)/?$'
|
|
with: $2
|
|
- regex: '^.*[^\d].*$' # if no code is found in the url
|
|
with: # return an empty string
|
|
# Uncomment this section to user Performer Search on this scraper
|
|
# performerSearch:
|
|
# common:
|
|
# $girlA: //div[@class="girl-card-info"]/a
|
|
# performer:
|
|
# Name: $girlA/span
|
|
# URL:
|
|
# selector: $girlA/@href
|
|
# postProcess:
|
|
# - replace:
|
|
# - regex: .*badoinkvr\.com/girl/redirect/([^\?]+).+
|
|
# with: https://badoinkvr.com/vr-pornstar/$1
|
|
# - regex: .*vrcosplayx\.com/girl/redirect/([^\?]+).+
|
|
# with: https://vrcosplayx.com/cosplaygirl/$1
|
|
# - regex: .*18vr\.com/girl/redirect/([^\?]+).+
|
|
# with: https://18vr.com/vrgirl/$1
|
|
# - regex: .*babesvr\.com/girl/redirect/([^\?]+).+
|
|
# with: https://babevr.com/vrbabe/$1
|
|
# - regex: .*realvr\.com/girl/redirect/([^\?]+).+
|
|
# with: https://realvr.com/pornstar/$1
|
|
# - regex: ^/
|
|
# with: https://kinkvr.com/
|
|
performerScraper:
|
|
common:
|
|
$stats: //ul[@id='girlOptionDetails']
|
|
performer:
|
|
Name: //div[@class='girl-details-info']/h1/text()
|
|
Gender:
|
|
fixed: female
|
|
URL: //link[@rel='canonical']/@href
|
|
Twitter: //a[contains(@class,'social-media') and contains (@href,'twitter')]/@href
|
|
Instagram: //a[contains(@class,'social-media') and contains (@href,'instagram')]/@href
|
|
Ethnicity:
|
|
selector: $stats/li[span[contains(.,'Ethnicity')]]/span[contains(@class,'value')]/text()
|
|
postProcess:
|
|
- map:
|
|
Caucasian: white
|
|
Asian: asian
|
|
Black: black
|
|
Latin: hispanic
|
|
Country: $stats/li[span[contains(.,'Country')]]/span[contains(@class,'value')]/text()
|
|
EyeColor: $stats/li[span[contains(.,'Eyes')]]/span[contains(@class,'value')]/text()
|
|
Height:
|
|
selector: $stats/li[span[contains(.,'Height')]]/span[contains(@class,'value')]/text()
|
|
postProcess:
|
|
- feetToCm: true
|
|
Measurements: $stats/li[span[contains(.,'Measure')]]/span[contains(@class,'value')]/text()
|
|
Aliases: $stats/li[span[contains(.,'Aka')]]/span[contains(@class,'value')]/text()
|
|
Image: //img[@id='girlImage']/@src
|
|
movieScraper:
|
|
common:
|
|
$details: *detailsAttr
|
|
movie:
|
|
Name:
|
|
selector: //meta[@name="dl8-customization-brand-name"]/@content|$details/h1/text()
|
|
concat: " - "
|
|
Duration:
|
|
selector: $details//p[@class="video-duration"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: Duration:\s
|
|
with: ""
|
|
- regex: \smin
|
|
with: ":00"
|
|
Date: *dateSel
|
|
Synopsis: *detailsSel
|
|
Studio:
|
|
Name: *studioName
|
|
FrontImage: *imageSel
|
|
# Last Updated November 25, 2022
|