54 lines
1.6 KiB
YAML
54 lines
1.6 KiB
YAML
name: Heyzo
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- en.heyzo.com/moviepages
|
|
scraper: sceneScraper
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
queryURL: https://en.heyzo.com/moviepages/{filename}
|
|
# constructs the movie URL from the filename, provided that the filename includes the movie id
|
|
queryURLReplace:
|
|
filename:
|
|
# heyzo uses a 4 digit number for ids, here we take a series of numbers just to be safe
|
|
- regex: (.*[^a-zA-Z\d])*(\d+)[^a-zA-Z\d].*
|
|
with: $2/index.html
|
|
scraper: sceneScraper
|
|
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$table: //div[@class="info-bg"]/table/tbody/tr
|
|
$movieObject: //script[contains(.,"movie_obj")]/text()
|
|
scene:
|
|
Date:
|
|
selector: $table/td[contains(.,"Released")]/following-sibling::td/text()
|
|
postProcess:
|
|
- parseDate: 2006-01-02
|
|
Performers:
|
|
Name: $table/td[contains(.,"Actress")]/following-sibling::td/a/text()
|
|
Image:
|
|
selector: $movieObject
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*thumbnail"\s*:\s*"([^"]+).*
|
|
with: https:$1
|
|
Title:
|
|
selector: $movieObject
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*name"\s*:\s*"([^"]+).*
|
|
with: $1
|
|
Code:
|
|
selector: //script[contains(.,"movieId")]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*movieId\s*=\s*"([^"]+).*
|
|
with: $1
|
|
Tags:
|
|
Name: $table/td[contains(.,"Type") or contains(.,"Sex Styles") or contains(.,"Theme") ]/following-sibling::td/a/text()
|
|
Studio:
|
|
Name:
|
|
fixed: Heyzo
|
|
# Last Updated January 16, 2024
|