compose-projects-arr/stash/config/scrapers/community/hussiemodels/hussiemodels.yml

name: hussiemodels
sceneByURL:
  - action: scrapeXPath
    url:
      - hookuphotshot.com/trailers/
      - hotandtatted.com/trailers/
      - hussiepass.com/trailers/
      - seehimfuck.com/trailers/
      - seehimsolo.com/trailers/
      - povpornstars.com
    scraper: hussieScraper
movieByURL:
  - action: scrapeXPath
    url:
      - hussiepass.com/dvds/
      - seehimfuck.com/dvds/
      - seehimsolo.com/dvds/
    scraper: hussieScraper
xPathScrapers:
  hussieScraper:
    common:
      $image: //img[contains(@class, "update_thumb") or contains(@class, "trailer-thumb")]
    movie:
      Name: //div[contains(@class, "profile-details")]/h3
      Synopsis: &dvdSynopsis //div[contains(@class, "profile-about")]/p/text()
      FrontImage: &dvdCover
        selector: >-
          //head/base/@href |
          //img[contains(@class, "dvd_box")]/@src0_4x |
          //img[contains(@class, "dvd_box")]/@src0_3x |
          //img[contains(@class, "dvd_box")]/@src0_2x |
          //img[contains(@class, "dvd_box")]/@src0_1x
        concat: __SEPARATOR__
        postProcess:
          - replace:
              - regex: (.*?)__SEPARATOR__(.*?)(:?__SEPARATOR).*
                with: $1$2
      Studio: &studio
        Name:
          selector: //head/base/@href
          postProcess:
            - replace:
                - regex: ^https://(?:[^.]+\.)?([^.]+)\.com/.*
                  with: $1
            - map:
                hookuphotshot: HookUp Hotshot
                hotandtatted: Hot and Tatted
                hussiepass: Hussie Pass
                seehimfuck: See Him Fuck
                seehimsolo: See Him Solo
                povpornstars: POV Pornstars
    scene:
      Title: //div[contains(@class, "videoDetails")]/*[starts-with(name(), 'h')]
      Date:
        selector: //div[contains(@class, "videoInfo")]/p[contains(span, "Date Added:")]/text()
        postProcess:
          # Only HookupHotshot still uses this format but the others use YYYY-MM-DD
          # so they'll fall through and still be correct wrt. expected Stash date format
          - parseDate: January 2, 2006
      Studio: *studio
      Performers:
        Name: //li[contains(@class, "update_models")]//a
      Tags:
        Name: //div[contains(@class, "featuring")]//a[contains(@href, "/categories/")]
      Movies:
        Name: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]
        URL: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
        FrontImage:
          selector: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
          postProcess:
            - subScraper: *dvdCover
        Synopsis:
          selector: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
          postProcess:
            - subScraper: *dvdSynopsis
        # this fails but I think it's a bug in Stash related to scraping more than one level of nesting
        # Studio: *studio
      Details:
        selector: //div[contains(@class, "videoDetails")]/p
        postProcess:
          - replace:
              # Where the <br> elements used to be
              - regex: ([!.?])([^!.?\s])
                with: "$1\n\n$2"
      Image:
        selector: //meta[@property="og:image"]/@content
        postProcess:
          - replace:
              - regex: 1x
                with: 4x
        # Some people prefer the higher res scene preview images
        # selector: >-
        #   $image/@src0_4x |
        #   $image/@src0_3x |
        #   $image/@src0_2x |
        #   $image/@src0_1x
# Last Updated January 10, 2024