compose-projects-arr/stash/config/scrapers/community/AdultSiteRunner/AdultSiteRunner.yml

name: Raunchy Bastards
sceneByURL:
  - action: scrapeXPath
    url:
      - boundjocks.com/scene/
      - boyshalfwayhouse.com/scene/
      - coltstudiogroup.com/scene/
      - daddycarl.com/scene/
      - hotoldermale.com/scene/
      - monstercub.com/scene/
      - naturalbornbreeders.com/scene/
      - older4me.com/scene/
      - raunchybastards.com/scene/
      - stockydudes.com/scene/
      - toplatindaddies.com/scene/
    scraper: oldStyleSite
  - action: scrapeXPath
    url:
      - blackboyaddictionz.com/scene/
      - blacksondaddies.com/scene/
      - myfirstdaddy.com/scene/
      - playdaddy.com/scene/
    scraper: newStyleSite
xPathScrapers:
  oldStyleSite:
    common:
      $scene: //div[contains(@class, "sceneContainer")]
    scene:
      Title: $scene/div[@class="sceneTitle"]
      Code:
        selector: $scene//div[contains(@class, "sceneImgBig")]/@id
        postProcess:
          - replace:
              - regex: \D*
                with: $1
      Date:
        selector: $scene//span[contains(@class, "sceneDetails")]
        postProcess: &ppDate
          - replace:
              # https://regex101.com/r/rsjbb6/3
              - regex: ^(?:Details:\s*)?(\w{3}\s*\d{1,2}),\s*(\d{4}).*?$
                with: $1, $2
          - parseDate: Jan 2, 2006
      # All of this can be replaced once scrapers get access to the URL they are scraping
      Studio:
        Name:
          selector: &image >
            $scene//video/@poster
            | $scene//div[contains(@class, "sceneImgBig")]/img/@src
            | //div[contains(@style, "background-image")]/@style
            | //*[contains(@class, "videoTrailer") or contains(@class, "bgScene")]//@srcset
          postProcess: &studioNameFromURL
            - replace:
                - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
                  with: $1
            - map:
                blackboyaddictionz: Black Boy Addictionz
                blacksondaddies: Blacks on Daddies
                boundjocks: Bound Jocks
                boyshalfwayhouse: Boys Halfway House
                coltstudiogroup: Colt Studio Group
                daddycarl: Daddy Carl
                hotoldermale: Hot Older Male
                monstercub: Monster Cub
                myfirstdaddy: My First Daddy
                naturalbornbreeders: Natural Born Breeders
                older4me: Older4Me
                playdaddy: Play Daddy
                raunchybastards: Raunchy Bastards
                stockydudees: Stocky dudes
                toplatindaddies: Top Latin Daddies
        URL:
          selector: *image
          postProcess:
            - replace:
                - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
                  with: https://$1.com
      Performers:
        Name: >
          $scene//div[contains(@class, "scenePerformers")]/a
          | $scene//div[@class="scenePerf"]/span[@class="perfName"]
        URL: >
          $scene//div[contains(@class, "scenePerformers")]/a/@href
          | $scene//div[@class="scenePerf"]/@data-href
      Tags:
        Name: $scene//a[@class="sceneTagsLnk"]/text()
      Details:
        selector: $scene//div[contains(@class, "sceneDescription")]/text()
        concat: "\n\n"
      Image:
        selector: *image
        postProcess:
          - replace:
              - regex: .*url\("(.*)"\).*
                with: $1
              - regex: \s*2x$
                with:

  newStyleSite:
    common:
      $details: //div[contains(@class, "container_styled_1")]
    scene:
      Title: //h2[@class="main_title"]
      Code:
        selector: //link[@rel="canonical"]/@href
        postProcess:
          - replace:
              - regex: \D*
                with: $1
      # All of this can be replaced once scrapers get access to the URL they are scraping
      Studio:
        Name:
          selector: //link[@rel="canonical"]/@href
          postProcess: *studioNameFromURL
        URL:
          selector: //link[@rel="canonical"]/@href
          postProcess:
            - replace:
                - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
                  with: https://$1.com
      Performers:
        Name: $details//span[contains(@class, "perfImage")]/a
        URL: $details//span[contains(@class, "perfImage")]/a/@href
      Details:
        selector: $details//p/text()
        concat: "\n\n"
      Date:
        selector: ($details//h5[contains(text(), "Details")]/text())[1]
        postProcess: *ppDate
      Image:
        selector: //meta[@property="og:image"]/@content
      Tags:
        Name: $details//h5[contains(., "Categories")]/a/text()
# Last Updated September 22, 2023