name: Heyzo
sceneByURL:
  - action: scrapeXPath
    url:
      - en.heyzo.com/moviepages
    scraper: sceneScraper
sceneByFragment:
  action: scrapeXPath
  queryURL: https://en.heyzo.com/moviepages/{filename}
  # constructs the movie URL from the filename, provided that the filename includes the movie id
  queryURLReplace:
    filename:
      # heyzo uses a 4 digit number for ids, here we take a series of numbers just to be safe
      - regex: (.*[^a-zA-Z\d])*(\d+)[^a-zA-Z\d].*
        with: $2/index.html
  scraper: sceneScraper

xPathScrapers:
  sceneScraper:
    common:
      $table: //div[@class="info-bg"]/table/tbody/tr
      $movieObject: //script[contains(.,"movie_obj")]/text()
    scene:
      Date:
        selector: $table/td[contains(.,"Released")]/following-sibling::td/text()
        postProcess:
          - parseDate: 2006-01-02
      Performers:
        Name: $table/td[contains(.,"Actress")]/following-sibling::td/a/text()
      Image:
        selector: $movieObject
        postProcess:
          - replace:
              - regex: .*thumbnail"\s*:\s*"([^"]+).*
                with: https:$1
      Title:
        selector: $movieObject
        postProcess:
          - replace:
              - regex: .*name"\s*:\s*"([^"]+).*
                with: $1
      Code:
        selector: //script[contains(.,"movieId")]/text()
        postProcess:
          - replace:
              - regex: .*movieId\s*=\s*"([^"]+).*
                with: $1
      Tags:
        Name: $table/td[contains(.,"Type") or contains(.,"Sex Styles") or contains(.,"Theme") ]/following-sibling::td/a/text()
      Studio:
        Name:
          fixed: Heyzo
# Last Updated January 16, 2024