This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,260 @@
name: Carnal+ / FTM+
sceneByURL:
# These studios have standalone scenes
- action: scrapeXPath
url:
- americanmusclehunks.com/videos/
- bangbangboys.com/videos/
- cumdumpsluts.com/videos
- dirtyboysociety.com/videos/
- edwardjames.com/videos/
- ftmmen.com/videos/
- hungfuckers.com/videos/
- jalifstudio.com/videos/
- jasonsparkslive.com/videos/
- jockbreeders.com/videos/
- jockpussy.com/videos/
- staghomme.com/videos/
- teensandtwinks.com/videos/
- twinks.com/videos/
scraper: sceneScraper
# These studios organize their scenes into series with chapters
# You can tell a studio belongs in this category
# if there's a "SERIES" link in the main navbar for their site
- action: scrapeXPath
url:
- boundtwinks.com/videos/
- boyforsale.com/videos/
- funsizeboys.com/videos/
- gaycest.com/videos/
- masonicboys.com/videos/
- rawfuckboys.com/videos/
- scoutboys.com/videos/
- transcest.com/videos/
- twinkloads.com/videos/
- twinktop.com/videos/
scraper: chapterSceneScraper
# The network site has all scenes from the standalone sites
- action: scrapeXPath
url:
- barebackplus.com/videos/
- carnalplus.com/videos/
- ftmplus.com/videos/
scraper: networkScraper
xPathScrapers:
sceneScraper:
common:
$scene: &sceneContainer //body/div[contains(@class, "mainContainer")]
scene:
Title: &title
selector: //title/text()
postProcess:
- replace:
- regex: \s*\|.*$
with:
Details: &details
selector: $scene//div[@class="full-txt"]//text()
concat: "\n\n"
Image: &image $scene//video/@poster | $scene//img[contains(@class, "hiddenImg")]/@src0_1x | $scene//img[contains(@class, "hiddenImg")]/@src | $scene//img[contains(@class, "hiddenImg")]/@data-src
URL: &url //link[@rel="canonical"]/@href
Date: &dateSubscraper
# We need to scrape the network site to get the date, but this scraper
# has to work for multiple networks so we can't hardcode the network site.
# Instead we fetch the network name from the shortcut icon and combine it
# with the canonical URL to construct the correct URL to scrape
# see https://regex101.com/r/QaZLIY/1 for an example
selector: //link[@rel="shortcut icon"]/@href | //link[@rel="canonical"]/@href
concat: __SEPARATOR__
postProcess:
- replace:
- regex: (?P<networkSite>.+\.com).*__SEPARATOR__.*(?P<path>\/videos.*).html
# We'd love to append `_vids` here but _ triggers the submatch in the regexp
# so we use the URL encoded version of an underscore instead: %5f
with: $networkSite$path%5fvids.html
- subScraper: //div[@class="releasedate"]
- replace:
&cleanDate # Remove the trailing "| Full length video : XX min YY sec" part
- regex: \s*\|.*
with:
- parseDate: January 02, 2006
Code: &studioCode
selector: //meta[@property="og:image"]/@content
postProcess:
- replace:
- regex: .*content\/([^\/]+).*
with: $1
# Some of these image URLs will not contain the studio code
# so we need to remove those manually here
- regex: ^https.*
with: ""
Studio: &studio
Name:
selector: //base/@href
postProcess:
- replace:
# https://regex101.com/r/JxFd9a/1
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
with: $1
- map:
# The canonical list of studio names are based on what
# they are called on their respective network sites
americanmusclehunks: American Muscle Hunks
bangbangboys: Bang Bang Boys
boundtwinks: Bound Twinks
boyforsale: Boy For Sale
cumdumpsluts: Cum Dump Sluts
dirtyboysociety: Dirty Boy Society
edwardjames: Edward James
ftmmen: FTM Men
funsizeboys: Funsize Boys
gaycest: Gaycest
hungfuckers: Hung Fuckers
jalifstudio: Jalif Studio
jockpussy: Jock Pussy
jockbreeders: Jock Breeders
masonicboys: Masonic Boys
rawfuckboys: Raw Fuck Boys
scoutboys: Scout Boys
staghomme: Stag Homme
teensandtwinks: Teens And Twinks
transcest: Transcest
twinks: Twinks
twinkloads: Twink Loads
twinktop: Twink Top
URL: //base/@href
Tags: &tags
Name: $scene//div[@id="catMovie"]//text()
Performers: &performers
Name: $scene//div[contains(@class, "modelProfile")]//h2 | $scene//div[contains(@class, "modelProfile")]//h3
networkScraper:
common:
$scene: ((//div[contains(@class, "main")])[1]/div)[1]
scene:
Title: *title
Details:
selector: $scene//div[@class='textDescription']//text()[not(parent::span[@id='firstWords' or @id='readmore'])]
concat: "\n\n"
Image: *image
Code:
selector: (//source/@src)[1]
postProcess:
- replace:
- regex: .*(\w{3}\d{4}).trailer.*
with: $1
# Some of these trailer URLs will not contain the studio code
# so we need to remove those manually here
- regex: ^https.*
with:
Date:
selector: $scene//div[@class="releasedate"]
postProcess:
- replace: *cleanDate
- parseDate: January 02, 2006
URL:
# All scenes on network sites should be available from their subsites as well
# so we construct a valid link to the subsite both to encourage people
# to scrape from the canonical source as well as submitting both links to StashDB:
# the network site will list the duration, which is helpful when evaluating
# the submitted fingerprints for the scene
selector: //link[@rel="canonical"]/@href | //div[@class="logoSubsites"]//img/@alt
concat: __SEPARATOR__
postProcess:
- replace:
- regex: .*(?P<path>videos/.*)__SEPARATOR__(?P<domain>.*)
with: https://$domain.com/$path
- regex: _vids
with: ""
# This table is should contain the same sites Studio Name in sceneScraper
- regex: AmericanMuscleHunks
with: americanmusclehunks
- regex: BangBangBoys
with: bangbangboys
- regex: BoundTwinks
with: boundtwinks
- regex: BoyForSale
with: boyforsale
- regex: CumDumpSluts
with: cumdumpsluts
- regex: DirtyBoySociety
with: dirtyboysociety
- regex: EdwardJames
with: edwardjames
- regex: FTMmen
with: ftmmen
- regex: FunsizeBoys
with: funsizeboys
- regex: Gaycest
with: gaycest
- regex: HungFuckers
with: hungfuckers
- regex: JasonSparksLive
with: jasonsparkslive
- regex: JalifStudio
with: jalifstudio
- regex: JockBreeders
with: jockbreeders
- regex: JockPussy
with: jockpussy
- regex: MasonicBoys
with: masonicboys
- regex: RawFuckBoys
with: rawfuckboys
- regex: ScoutBoys
with: scoutboys
- regex: StagHomme
with: staghomme
- regex: TeensAndTwinks
with: teensandtwinks
- regex: Transcest
with: transcest
- regex: Twinks
with: twinks
- regex: Twinkloads
with: twinkloads
- regex: TwinkTop
with: twinktop
Tags:
Name: $scene//div[@class="update_tags"]//text()
Performers:
Name: $scene//div[@id="models"]//h4
Studio:
Name:
selector: //div[@class="logoSubsites"]//img/@alt
postProcess:
- replace:
# Turn PascalCaseWords to Pascal Case Words
- regex: ([a-z])([A-Z])
with: $1 $2
URL:
selector: //div[@class="logoSubsites"]//img/@alt
postProcess:
- replace:
- regex: (?P<domain>.*)
with: https://$domain.com
chapterSceneScraper:
common:
$scene: *sceneContainer
# The differences from the regular sceneScraper is that we
# shuffle the title around a little to reflect what's shown
# on the page instead of in the title bar (also makes them easier to sort)
# and we use the series as a movie so that people can group their scenes together
scene:
Title:
selector: //title/text()
postProcess:
- replace:
# https://regex101.com/r/y1Clkp/2
- regex: (?P<title>.*?) - (?P<series>.*?) - (?P<chapter>.*?) \|.*
with: $series - $chapter - $title
Details: *details
Date: *dateSubscraper
Code: *studioCode
Image: *image
URL: *url
Studio: *studio
Movies:
Name: //span[contains(@class, "dvdTitleScene")]
URL: //span[contains(@class, "dvdTitleScene")]//a/@href
Tags: *tags
Performers: *performers
# Last Updated September 21, 2023

View File

@@ -0,0 +1,9 @@
id: CarnalPlus
name: Carnal+ / FTM+
metadata: {}
version: e237016
date: "2024-03-25 01:41:58"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- CarnalPlus.yml