79 lines
2.5 KiB
YAML
79 lines
2.5 KiB
YAML
# yaml-language-server: $schema=../validator/scraper.schema.json
|
||
|
||
name: "SexLikeReal"
|
||
sceneByURL:
|
||
- action: scrapeXPath
|
||
url:
|
||
- sexlikereal.com
|
||
scraper: sceneScraper
|
||
|
||
sceneByFragment:
|
||
action: scrapeXPath
|
||
# url format: https://www.sexlikereal.com/scenes/{title}-{code}
|
||
# However, the url:
|
||
# https://www.sexlikereal.com/{code}
|
||
# will redirect to the full url so that is what we will use for scrapping
|
||
queryURL: https://www.sexlikereal.com/{filename}
|
||
queryURLReplace:
|
||
# filename format:
|
||
# SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
|
||
# vrtype: stuff we do not care about but could contain '_'
|
||
filename:
|
||
- regex: (?i)^SLR_.+_\d+p_(\d+)_.*$
|
||
with: $1
|
||
- regex: .*\.[^\.]+$ # if no id is found in the filename
|
||
with: # clear the filename so that it doesn't leak
|
||
scraper: sceneScraper
|
||
|
||
xPathScrapers:
|
||
sceneScraper:
|
||
scene:
|
||
Title:
|
||
selector: //script[@type="text/javascript"][contains(.,"videoData:")]/text()
|
||
postProcess:
|
||
- replace:
|
||
- regex: '.+videoData:\s{[^{]+title":"([^"]+)",.+'
|
||
with: $1
|
||
- regex: '\\u2019'
|
||
with: "’"
|
||
- regex: '\\u2013'
|
||
with: "–"
|
||
Date: //time/@datetime
|
||
Details:
|
||
selector: //div[@data-qa="scene-about-tab-text"]/text()[last()]
|
||
postProcess:
|
||
- replace:
|
||
- regex: '^\.\s*'
|
||
with:
|
||
Tags:
|
||
Name: //meta[@property="video:tag"]/@content|//ul[@data-qa="scene-specs-list"]/li/span/text()
|
||
Performers:
|
||
Name: //meta[@property="video:actor"]/@content
|
||
Studio:
|
||
Name:
|
||
selector: //a[contains(@href,"/studios/")]/div[last()]/text()
|
||
postProcess:
|
||
- map:
|
||
DDFNetworkVR: "DDF Network VR"
|
||
LethalHardcoreVR: "Lethal Hardcore VR"
|
||
LustReality: "Lust Reality"
|
||
POVcentralVR: "POV Central"
|
||
RealHotVR: "Real Hot VR"
|
||
SinsVR: "XSinsVR"
|
||
VirtualXPorn: "Virtual X Porn"
|
||
WankitnowVR: "Wank It Now VR"
|
||
Image:
|
||
selector: //meta[@property="og:image"]/@content
|
||
postProcess:
|
||
- replace:
|
||
- regex: -app\.
|
||
with: -desktop.
|
||
URL: &sceneUrl //link[@rel="canonical"]/@href
|
||
Code:
|
||
selector: *sceneUrl
|
||
postProcess:
|
||
- replace:
|
||
- regex: '^(.+)-(\d+)/?$'
|
||
with: $2
|
||
# Last Updated October 21, 2023
|