This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,143 @@
name: "HelixStudios"
sceneByURL:
- action: scrapeXPath
url:
- helixstudios.com/video/
scraper: sceneScraper
sceneByName:
action: scrapeXPath
queryURL: https://www.helixstudios.com/videos?q={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- helixstudios.com/model/
scraper: performerScraper
performerByName:
action: scrapeXPath
queryURL: https://www.helixstudios.com/models/?q={}
scraper: performerSearch
xPathScrapers:
sceneSearch:
common:
$searchItem: //div[contains(@class, "browse-results-grid thumbnail-grid pure-g")]
$searchThumb: //div[contains(@class, "browse-results-grid thumbnail-grid pure-g")]//div[contains(@class, "grid-item-wrapper")]/a
scene:
Title: $searchThumb//h4/text()
URL:
selector: $searchItem//div[contains(@class, "grid-item-wrapper")]/a/@href
postProcess:
- replace:
- regex: ^
with: "https://helixstudios.com"
Image:
selector: $searchItem//div[contains(@class, "image-wrapper img-poster")]//img/@data-src
sceneScraper:
common:
$info: //div[@class="text"]
scene:
Title:
selector: //div[@class="video-info"]/span[1]/text()
Date:
selector: //div[@class="info-items"]/span[@class="info-item date"]/text()
postProcess:
- parseDate: January 2, 2006
Details:
selector: //div[contains(@class, "description-content")]/p[node()]
concat: "#LINEBREAK#"
postProcess:
- replace:
- regex: "___.*"
with:
- regex: "#LINEBREAK#"
with: "\n\n"
Tags:
Name: //div[@class="video-tags-wrapper"]/a/text()
Performers:
Name:
selector: //div[@class="video-cast"]//a/@title
URL: //link[@rel="canonical"][1]/@href
Image:
selector: //video/@poster
postProcess:
- replace:
- regex: 960w
with: 1500w
Director:
selector: //span[contains(@class, "info-item director")]/text()
Code:
selector: //*[@id="titleImage"]/@src
postProcess:
- replace:
- regex: ^.*\/\s*
with:
- replace:
- regex: \_1600.*$
with:
Studio:
Name:
fixed: Helix
performerSearch:
common:
$result: //div[@class="grid-item-wrapper"]/a[@class="thumbnail-link"]
performer:
Name: $result/div[@class="thumbnail model-thumbnail"]/div[@class="thumbnail-meta"]/h4/text()
URL:
selector: $result/@href
postProcess:
- replace:
- regex: ^
with: "https://helixstudios.com"
performerScraper:
performer:
Name: //div[contains(@class, "top")]/h1/text()
Gender:
fixed: Male
Height:
selector: //span[text()="Height"]/following-sibling::text()
postProcess:
- feetToCm: true
Measurements:
selector: //span[text()="Cock"]/following-sibling::text()
postProcess:
- replace:
- regex: ^
with: "0'"
- replace:
- regex: inch*
with: "''"
- feetToCm: true
Weight:
selector: //span[text()="Weight"]/following-sibling::text()
postProcess:
- replace:
- regex: lbs
with: ""
- lbToKg: true
HairColor:
selector: //span[text()="Hair"]/following-sibling::text()
EyeColor:
selector: //span[text()="Eyes"]/following-sibling::text()
URL: //link[@rel="canonical"][1]/@href
Image: //div[@class="model-headshot-image-wrapper"]/img/@src
Details:
selector: //div[@class="description"]//p[*]
concat: "\n\n"
postProcess:
- replace:
- regex: $
with: " "
# Last Updated January 14, 2024