# yaml-language-server: $schema=../validator/scraper.schema.json name: "SexLikeReal" sceneByURL: - action: scrapeXPath url: - sexlikereal.com scraper: sceneScraper sceneByFragment: action: scrapeXPath # url format: https://www.sexlikereal.com/scenes/{title}-{code} # However, the url: # https://www.sexlikereal.com/{code} # will redirect to the full url so that is what we will use for scrapping queryURL: https://www.sexlikereal.com/{filename} queryURLReplace: # filename format: # SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext} # vrtype: stuff we do not care about but could contain '_' filename: - regex: (?i)^SLR_.+_\d+p_(\d+)_.*$ with: $1 - regex: .*\.[^\.]+$ # if no id is found in the filename with: # clear the filename so that it doesn't leak scraper: sceneScraper xPathScrapers: sceneScraper: scene: Title: selector: //script[@type="text/javascript"][contains(.,"videoData:")]/text() postProcess: - replace: - regex: '.+videoData:\s{[^{]+title":"([^"]+)",.+' with: $1 - regex: '\\u2019' with: "’" - regex: '\\u2013' with: "–" Date: //time/@datetime Details: selector: //div[@data-qa="scene-about-tab-text"]/text()[last()] postProcess: - replace: - regex: '^\.\s*' with: Tags: Name: //meta[@property="video:tag"]/@content|//ul[@data-qa="scene-specs-list"]/li/span/text() Performers: Name: //meta[@property="video:actor"]/@content Studio: Name: selector: //a[contains(@href,"/studios/")]/div[last()]/text() postProcess: - map: DDFNetworkVR: "DDF Network VR" LethalHardcoreVR: "Lethal Hardcore VR" LustReality: "Lust Reality" POVcentralVR: "POV Central" RealHotVR: "Real Hot VR" SinsVR: "XSinsVR" VirtualXPorn: "Virtual X Porn" WankitnowVR: "Wank It Now VR" Image: selector: //meta[@property="og:image"]/@content postProcess: - replace: - regex: -app\. with: -desktop. URL: &sceneUrl //link[@rel="canonical"]/@href Code: selector: *sceneUrl postProcess: - replace: - regex: '^(.+)-(\d+)/?$' with: $2 # Last Updated October 21, 2023