stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/CarnalPlus/CarnalPlus.yml
+++ b/stash/config/scrapers/community/CarnalPlus/CarnalPlus.yml
@@ -0,0 +1,260 @@
+name: Carnal+ / FTM+
+sceneByURL:
+  # These studios have standalone scenes
+  - action: scrapeXPath
+    url:
+      - americanmusclehunks.com/videos/
+      - bangbangboys.com/videos/
+      - cumdumpsluts.com/videos
+      - dirtyboysociety.com/videos/
+      - edwardjames.com/videos/
+      - ftmmen.com/videos/
+      - hungfuckers.com/videos/
+      - jalifstudio.com/videos/
+      - jasonsparkslive.com/videos/
+      - jockbreeders.com/videos/
+      - jockpussy.com/videos/
+      - staghomme.com/videos/
+      - teensandtwinks.com/videos/
+      - twinks.com/videos/
+    scraper: sceneScraper
+  # These studios organize their scenes into series with chapters
+  # You can tell a studio belongs in this category
+  # if there's a "SERIES" link in the main navbar for their site
+  - action: scrapeXPath
+    url:
+      - boundtwinks.com/videos/
+      - boyforsale.com/videos/
+      - funsizeboys.com/videos/
+      - gaycest.com/videos/
+      - masonicboys.com/videos/
+      - rawfuckboys.com/videos/
+      - scoutboys.com/videos/
+      - transcest.com/videos/
+      - twinkloads.com/videos/
+      - twinktop.com/videos/
+    scraper: chapterSceneScraper
+  # The network site has all scenes from the standalone sites
+  - action: scrapeXPath
+    url:
+      - barebackplus.com/videos/
+      - carnalplus.com/videos/
+      - ftmplus.com/videos/
+    scraper: networkScraper
+xPathScrapers:
+  sceneScraper:
+    common:
+      $scene: &sceneContainer //body/div[contains(@class, "mainContainer")]
+    scene:
+      Title: &title
+        selector: //title/text()
+        postProcess:
+          - replace:
+              - regex: \s*\|.*$
+                with:
+      Details: &details
+        selector: $scene//div[@class="full-txt"]//text()
+        concat: "\n\n"
+      Image: &image $scene//video/@poster | $scene//img[contains(@class, "hiddenImg")]/@src0_1x | $scene//img[contains(@class, "hiddenImg")]/@src | $scene//img[contains(@class, "hiddenImg")]/@data-src
+      URL: &url //link[@rel="canonical"]/@href
+      Date: &dateSubscraper
+        # We need to scrape the network site to get the date, but this scraper
+        # has to work for multiple networks so we can't hardcode the network site.
+        # Instead we fetch the network name from the shortcut icon and combine it
+        # with the canonical URL to construct the correct URL to scrape
+        # see https://regex101.com/r/QaZLIY/1 for an example
+        selector: //link[@rel="shortcut icon"]/@href | //link[@rel="canonical"]/@href
+        concat: __SEPARATOR__
+        postProcess:
+          - replace:
+              - regex: (?P<networkSite>.+\.com).*__SEPARATOR__.*(?P<path>\/videos.*).html
+                # We'd love to append `_vids` here but _ triggers the submatch in the regexp
+                # so we use the URL encoded version of an underscore instead: %5f
+                with: $networkSite$path%5fvids.html
+          - subScraper: //div[@class="releasedate"]
+          - replace:
+              &cleanDate # Remove the trailing "| Full length video : XX min YY sec" part
+              - regex: \s*\|.*
+                with:
+          - parseDate: January 02, 2006
+      Code: &studioCode
+        selector: //meta[@property="og:image"]/@content
+        postProcess:
+          - replace:
+              - regex: .*content\/([^\/]+).*
+                with: $1
+              # Some of these image URLs will not contain the studio code
+              # so we need to remove those manually here
+              - regex: ^https.*
+                with: ""
+      Studio: &studio
+        Name:
+          selector: //base/@href
+          postProcess:
+            - replace:
+                # https://regex101.com/r/JxFd9a/1
+                - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
+                  with: $1
+            - map:
+                # The canonical list of studio names are based on what
+                # they are called on their respective network sites
+                americanmusclehunks: American Muscle Hunks
+                bangbangboys: Bang Bang Boys
+                boundtwinks: Bound Twinks
+                boyforsale: Boy For Sale
+                cumdumpsluts: Cum Dump Sluts
+                dirtyboysociety: Dirty Boy Society
+                edwardjames: Edward James
+                ftmmen: FTM Men
+                funsizeboys: Funsize Boys
+                gaycest: Gaycest
+                hungfuckers: Hung Fuckers
+                jalifstudio: Jalif Studio
+                jockpussy: Jock Pussy
+                jockbreeders: Jock Breeders
+                masonicboys: Masonic Boys
+                rawfuckboys: Raw Fuck Boys
+                scoutboys: Scout Boys
+                staghomme: Stag Homme
+                teensandtwinks: Teens And Twinks
+                transcest: Transcest
+                twinks: Twinks
+                twinkloads: Twink Loads
+                twinktop: Twink Top
+        URL: //base/@href
+      Tags: &tags
+        Name: $scene//div[@id="catMovie"]//text()
+      Performers: &performers
+        Name: $scene//div[contains(@class, "modelProfile")]//h2 | $scene//div[contains(@class, "modelProfile")]//h3
+  networkScraper:
+    common:
+      $scene: ((//div[contains(@class, "main")])[1]/div)[1]
+    scene:
+      Title: *title
+      Details:
+        selector: $scene//div[@class='textDescription']//text()[not(parent::span[@id='firstWords' or @id='readmore'])]
+        concat: "\n\n"
+      Image: *image
+      Code:
+        selector: (//source/@src)[1]
+        postProcess:
+          - replace:
+              - regex: .*(\w{3}\d{4}).trailer.*
+                with: $1
+              # Some of these trailer URLs will not contain the studio code
+              # so we need to remove those manually here
+              - regex: ^https.*
+                with:
+      Date:
+        selector: $scene//div[@class="releasedate"]
+        postProcess:
+          - replace: *cleanDate
+          - parseDate: January 02, 2006
+      URL:
+        # All scenes on network sites should be available from their subsites as well
+        # so we construct a valid link to the subsite both to encourage people
+        # to scrape from the canonical source as well as submitting both links to StashDB:
+        # the network site will list the duration, which is helpful when evaluating
+        # the submitted fingerprints for the scene
+        selector: //link[@rel="canonical"]/@href | //div[@class="logoSubsites"]//img/@alt
+        concat: __SEPARATOR__
+        postProcess:
+          - replace:
+              - regex: .*(?P<path>videos/.*)__SEPARATOR__(?P<domain>.*)
+                with: https://$domain.com/$path
+              - regex: _vids
+                with: ""
+              # This table is should contain the same sites Studio Name in sceneScraper
+              - regex: AmericanMuscleHunks
+                with: americanmusclehunks
+              - regex: BangBangBoys
+                with: bangbangboys
+              - regex: BoundTwinks
+                with: boundtwinks
+              - regex: BoyForSale
+                with: boyforsale
+              - regex: CumDumpSluts
+                with: cumdumpsluts
+              - regex: DirtyBoySociety
+                with: dirtyboysociety
+              - regex: EdwardJames
+                with: edwardjames
+              - regex: FTMmen
+                with: ftmmen
+              - regex: FunsizeBoys
+                with: funsizeboys
+              - regex: Gaycest
+                with: gaycest
+              - regex: HungFuckers
+                with: hungfuckers
+              - regex: JasonSparksLive
+                with: jasonsparkslive
+              - regex: JalifStudio
+                with: jalifstudio
+              - regex: JockBreeders
+                with: jockbreeders
+              - regex: JockPussy
+                with: jockpussy
+              - regex: MasonicBoys
+                with: masonicboys
+              - regex: RawFuckBoys
+                with: rawfuckboys
+              - regex: ScoutBoys
+                with: scoutboys
+              - regex: StagHomme
+                with: staghomme
+              - regex: TeensAndTwinks
+                with: teensandtwinks
+              - regex: Transcest
+                with: transcest
+              - regex: Twinks
+                with: twinks
+              - regex: Twinkloads
+                with: twinkloads
+              - regex: TwinkTop
+                with: twinktop
+      Tags:
+        Name: $scene//div[@class="update_tags"]//text()
+      Performers:
+        Name: $scene//div[@id="models"]//h4
+      Studio:
+        Name:
+          selector: //div[@class="logoSubsites"]//img/@alt
+          postProcess:
+            - replace:
+                # Turn PascalCaseWords to Pascal Case Words
+                - regex: ([a-z])([A-Z])
+                  with: $1 $2
+        URL:
+          selector: //div[@class="logoSubsites"]//img/@alt
+          postProcess:
+            - replace:
+                - regex: (?P<domain>.*)
+                  with: https://$domain.com
+  chapterSceneScraper:
+    common:
+      $scene: *sceneContainer
+      # The differences from the regular sceneScraper is that we
+      # shuffle the title around a little to reflect what's shown
+      # on the page instead of in the title bar (also makes them easier to sort)
+      # and we use the series as a movie so that people can group their scenes together
+    scene:
+      Title:
+        selector: //title/text()
+        postProcess:
+          - replace:
+              # https://regex101.com/r/y1Clkp/2
+              - regex: (?P<title>.*?) - (?P<series>.*?) - (?P<chapter>.*?) \|.*
+                with: $series - $chapter - $title
+      Details: *details
+      Date: *dateSubscraper
+      Code: *studioCode
+      Image: *image
+      URL: *url
+      Studio: *studio
+      Movies:
+        Name: //span[contains(@class, "dvdTitleScene")]
+        URL: //span[contains(@class, "dvdTitleScene")]//a/@href
+      Tags: *tags
+      Performers: *performers
+# Last Updated September 21, 2023
--- a/stash/config/scrapers/community/CarnalPlus/manifest
+++ b/stash/config/scrapers/community/CarnalPlus/manifest
@@ -0,0 +1,9 @@
+id: CarnalPlus
+name: Carnal+ / FTM+
+metadata: {}
+version: e237016
+date: "2024-03-25 01:41:58"
+requires: []
+source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
+files:
+- CarnalPlus.yml