This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: "SexLikeReal"
sceneByURL:
- action: scrapeXPath
url:
- sexlikereal.com
scraper: sceneScraper
sceneByFragment:
action: scrapeXPath
# url format: https://www.sexlikereal.com/scenes/{title}-{code}
# However, the url:
# https://www.sexlikereal.com/{code}
# will redirect to the full url so that is what we will use for scrapping
queryURL: https://www.sexlikereal.com/{filename}
queryURLReplace:
# filename format:
# SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
# vrtype: stuff we do not care about but could contain '_'
filename:
- regex: (?i)^SLR_.+_\d+p_(\d+)_.*$
with: $1
- regex: .*\.[^\.]+$ # if no id is found in the filename
with: # clear the filename so that it doesn't leak
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //script[@type="text/javascript"][contains(.,"videoData:")]/text()
postProcess:
- replace:
- regex: '.+videoData:\s{[^{]+title":"([^"]+)",.+'
with: $1
- regex: '\\u2019'
with: ""
- regex: '\\u2013'
with: ""
Date: //time/@datetime
Details:
selector: //div[@data-qa="scene-about-tab-text"]/text()[last()]
postProcess:
- replace:
- regex: '^\.\s*'
with:
Tags:
Name: //meta[@property="video:tag"]/@content|//ul[@data-qa="scene-specs-list"]/li/span/text()
Performers:
Name: //meta[@property="video:actor"]/@content
Studio:
Name:
selector: //a[contains(@href,"/studios/")]/div[last()]/text()
postProcess:
- map:
DDFNetworkVR: "DDF Network VR"
LethalHardcoreVR: "Lethal Hardcore VR"
LustReality: "Lust Reality"
POVcentralVR: "POV Central"
RealHotVR: "Real Hot VR"
SinsVR: "XSinsVR"
VirtualXPorn: "Virtual X Porn"
WankitnowVR: "Wank It Now VR"
Image:
selector: //meta[@property="og:image"]/@content
postProcess:
- replace:
- regex: -app\.
with: -desktop.
URL: &sceneUrl //link[@rel="canonical"]/@href
Code:
selector: *sceneUrl
postProcess:
- replace:
- regex: '^(.+)-(\d+)/?$'
with: $2
# Last Updated October 21, 2023