This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,53 @@
name: Heyzo
sceneByURL:
- action: scrapeXPath
url:
- en.heyzo.com/moviepages
scraper: sceneScraper
sceneByFragment:
action: scrapeXPath
queryURL: https://en.heyzo.com/moviepages/{filename}
# constructs the movie URL from the filename, provided that the filename includes the movie id
queryURLReplace:
filename:
# heyzo uses a 4 digit number for ids, here we take a series of numbers just to be safe
- regex: (.*[^a-zA-Z\d])*(\d+)[^a-zA-Z\d].*
with: $2/index.html
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$table: //div[@class="info-bg"]/table/tbody/tr
$movieObject: //script[contains(.,"movie_obj")]/text()
scene:
Date:
selector: $table/td[contains(.,"Released")]/following-sibling::td/text()
postProcess:
- parseDate: 2006-01-02
Performers:
Name: $table/td[contains(.,"Actress")]/following-sibling::td/a/text()
Image:
selector: $movieObject
postProcess:
- replace:
- regex: .*thumbnail"\s*:\s*"([^"]+).*
with: https:$1
Title:
selector: $movieObject
postProcess:
- replace:
- regex: .*name"\s*:\s*"([^"]+).*
with: $1
Code:
selector: //script[contains(.,"movieId")]/text()
postProcess:
- replace:
- regex: .*movieId\s*=\s*"([^"]+).*
with: $1
Tags:
Name: $table/td[contains(.,"Type") or contains(.,"Sex Styles") or contains(.,"Theme") ]/following-sibling::td/a/text()
Studio:
Name:
fixed: Heyzo
# Last Updated January 16, 2024