name: AdultEmpire

sceneByURL:
  - action: scrapeXPath
    url:
      - adultdvdempire.com/clip
      - adultempire.com/clip
    scraper: sceneScraperSingle
  - action: scrapeXPath
    url:
      - adultdvdempire.com
      - adultempire.com
    scraper: sceneScraper
movieByURL:
  - action: scrapeXPath
    url:
      - adultdvdempire.com
      - adultempire.com
    scraper: movieScraper
    
performerByName:
  action: scrapeXPath
  queryURL: https://www.adultempire.com/performer/search?q={}
  scraper: performerSearch

performerByURL:
  - action: scrapeXPath
    url: 
      - adultempire.com
      - adultdvdempire.com
    scraper: performerScraper  

sceneByName:
  action: scrapeXPath
  # if needed replace `dvd` with `vod`
  queryURL: https://www.adultdvdempire.com/dvd/search?q={}
  scraper: sceneSearch
sceneByQueryFragment:
  action: scrapeXPath
  queryURL: "{url}"
  scraper: sceneScraper

xPathScrapers:
  performerSearch:
    common:
      $perfomerRoot: //div[@id="performerlist"]//a
    performer:
      Name: $perfomerRoot/@label
      URL: 
        selector: $perfomerRoot/@href
        postProcess:
          - replace:
            - regex: ^
              with: https://www.adultempire.com

  sceneSearch:
    scene:
      Title:
        selector: //div[@class="item-title"]/a/@*[local-name()="href" or local-name()="title"]
        concat: "|"
        postProcess:
          - replace:
            - regex: '/(\d+)/[^|]+\|([^|]+)'
              with: "$2 ($1)"
        split: "|"
      URL:
        selector: //div[@id="content"]//div[@class="item-title"]/a/@href
        postProcess:
          - replace:
              - regex: ^
                with: "https://www.adultdvdempire.com"
      Image:
        selector: //a[@class="boxcover"]/img/@data-src

  movieScraper:
    movie:
      Name: //h1/text()
      Director: //a[@label="Director"]/text()
      Duration:
        selector: //small[contains(text(), "Length")]/following-sibling::text()
        postProcess:
          - replace:
            - regex: " hrs. "
              with: ":"
            - regex: " mins."
              with: ":00"
      Date:
        selector: //small[contains(text(), "Released")]/following-sibling::text()
        postProcess:
          - parseDate: Jan 02 2006
      Synopsis:
        selector: //div[contains(@class,"synopsis-content")]//text()
        concat: " "
      Studio:
        Name: //a[@label="Studio"]/text()
      FrontImage: //a[@id="front-cover"]/@data-href
      BackImage: //a[@id="back-cover"]/@href
      # Rating is not yet implemented in the UX
      # Rating:
      #   selector: //span[@class='rating-stars-avg']/text()
      #   postProcess:
      #     - replace:
      #       - regex: (\d).+
      #         with: $1
      URL: //meta[@name='og:url']/@content
  sceneScraperSingle:
    common:
      $header: //div[@class="clip-page__detail__title text-display-primary"]
      $clipId: (//*[@data-tid]/@data-tid)[1]
    scene:
      Title: $header/h1
      Studio:
        Name: //div[contains(text(), "by")]/a
      Movies:
        Name: //div[contains(text(), "from")]/a
      Date:
        selector: //strong[contains(text(), "Released")]/following-sibling::text()
        postProcess:
          - parseDate: Jan 02 2006
      Tags:
        Name: //strong[contains(text(), "Attributes")]/following-sibling::a/text()
      Performers:
        Name: //strong[contains(text(), "Starring")]/following-sibling::a/text()
        URL:
          selector: //strong[contains(text(), "Starring")]/following-sibling::a/@href
          postProcess:
            - replace:
                - regex: ^
                  with: "https://www.adultdvdempire.com"
      Image:
        selector: (//*[@data-tid]/@data-tid)[1] | //a[@id="front-cover"]/img/@src
        concat: __SEPARATOR__
        postProcess:
          - replace:
              - regex: (\d+).*/([^/]*\d+)[^/\d]*$
                with: https://imgs1cdn.adultempire.com/backdrop/6000/$2%5f$1/scene-1.jpg
  sceneScraper:
    scene:
      Title: //h1/text()
      Details:
        selector: //div[contains(@class,"synopsis-content")]//text()
        concat: " "
      Date:
        selector: //small[contains(text(), "Released")]/following-sibling::text()
        postProcess:
          - parseDate: Jan 02 2006
      Director: //a[@label="Director"]/text()    
      Image: //a[@id="front-cover"]/@data-href
      Studio:
        Name: //a[@label="Studio"]/text()
      Movies:
        Name: //h1/text()
        URL: //link[@rel="canonical"]/@href
      Tags:
        Name: //div[h2[contains(.,'Categories')]]//a[@label="Category"]/text()
      Performers:
        Name: //a[@label="Performer"]//text()
        URL:
          selector: //a[@label="Performer"]/@href
          postProcess:
            - replace:
                - regex: ^
                  with: "https://www.adultdvdempire.com"
      URL: //meta[@name='og:url']/@content
  performerScraper:
    common:
      $infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul
    performer:
      Name: //*[@id="content"]/section/div/div[2]/h1/text()
      Birthdate:
        selector: $infoPiece/li[contains(text(), 'Born:')]/text()
        postProcess:
          - replace:
              - regex: Born:\s+(.*)
                with: $1
      Height:
        selector: $infoPiece/li[contains(text(), 'Height:')]/text()
        postProcess:
          - replace:
              - regex: Height:\s+(.*)
                with: $1
          - feetToCm: true
      Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src|//section[@class="container-fluid"]//a[@class="fancy"][@label="Headshot"]/@href
      Country:
        selector: $infoPiecel/li[contains(text(), 'From:')]/text()
        postProcess:
          - replace:
              - regex: From:\s+(.*)
                with: $1
      Measurements:
        selector: $infoPiece/li[contains(text(), 'Measurements:')]/text()
        postProcess:
          - replace:
              - regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).*
                with: $1-$2-$3
      Aliases:
        selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")]
        concat: ", "
        postProcess:
          - replace:
              - regex: "Alias: (.*)"
                with: $1
      Details: //*[@id="content"]/section/div/div[5]/aside/text()
      URL: //link[@rel='canonical']/@href
# Last Updated Feburary 22, 2024