name: Carnal+ / FTM+ sceneByURL: # These studios have standalone scenes - action: scrapeXPath url: - americanmusclehunks.com/videos/ - bangbangboys.com/videos/ - cumdumpsluts.com/videos - dirtyboysociety.com/videos/ - edwardjames.com/videos/ - ftmmen.com/videos/ - hungfuckers.com/videos/ - jalifstudio.com/videos/ - jasonsparkslive.com/videos/ - jockbreeders.com/videos/ - jockpussy.com/videos/ - staghomme.com/videos/ - teensandtwinks.com/videos/ - twinks.com/videos/ scraper: sceneScraper # These studios organize their scenes into series with chapters # You can tell a studio belongs in this category # if there's a "SERIES" link in the main navbar for their site - action: scrapeXPath url: - boundtwinks.com/videos/ - boyforsale.com/videos/ - funsizeboys.com/videos/ - gaycest.com/videos/ - masonicboys.com/videos/ - rawfuckboys.com/videos/ - scoutboys.com/videos/ - transcest.com/videos/ - twinkloads.com/videos/ - twinktop.com/videos/ scraper: chapterSceneScraper # The network site has all scenes from the standalone sites - action: scrapeXPath url: - barebackplus.com/videos/ - carnalplus.com/videos/ - ftmplus.com/videos/ scraper: networkScraper xPathScrapers: sceneScraper: common: $scene: &sceneContainer //body/div[contains(@class, "mainContainer")] scene: Title: &title selector: //title/text() postProcess: - replace: - regex: \s*\|.*$ with: Details: &details selector: $scene//div[@class="full-txt"]//text() concat: "\n\n" Image: &image $scene//video/@poster | $scene//img[contains(@class, "hiddenImg")]/@src0_1x | $scene//img[contains(@class, "hiddenImg")]/@src | $scene//img[contains(@class, "hiddenImg")]/@data-src URL: &url //link[@rel="canonical"]/@href Date: &dateSubscraper # We need to scrape the network site to get the date, but this scraper # has to work for multiple networks so we can't hardcode the network site. # Instead we fetch the network name from the shortcut icon and combine it # with the canonical URL to construct the correct URL to scrape # see https://regex101.com/r/QaZLIY/1 for an example selector: //link[@rel="shortcut icon"]/@href | //link[@rel="canonical"]/@href concat: __SEPARATOR__ postProcess: - replace: - regex: (?P.+\.com).*__SEPARATOR__.*(?P\/videos.*).html # We'd love to append `_vids` here but _ triggers the submatch in the regexp # so we use the URL encoded version of an underscore instead: %5f with: $networkSite$path%5fvids.html - subScraper: //div[@class="releasedate"] - replace: &cleanDate # Remove the trailing "| Full length video : XX min YY sec" part - regex: \s*\|.* with: - parseDate: January 02, 2006 Code: &studioCode selector: //meta[@property="og:image"]/@content postProcess: - replace: - regex: .*content\/([^\/]+).* with: $1 # Some of these image URLs will not contain the studio code # so we need to remove those manually here - regex: ^https.* with: "" Studio: &studio Name: selector: //base/@href postProcess: - replace: # https://regex101.com/r/JxFd9a/1 - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ with: $1 - map: # The canonical list of studio names are based on what # they are called on their respective network sites americanmusclehunks: American Muscle Hunks bangbangboys: Bang Bang Boys boundtwinks: Bound Twinks boyforsale: Boy For Sale cumdumpsluts: Cum Dump Sluts dirtyboysociety: Dirty Boy Society edwardjames: Edward James ftmmen: FTM Men funsizeboys: Funsize Boys gaycest: Gaycest hungfuckers: Hung Fuckers jalifstudio: Jalif Studio jockpussy: Jock Pussy jockbreeders: Jock Breeders masonicboys: Masonic Boys rawfuckboys: Raw Fuck Boys scoutboys: Scout Boys staghomme: Stag Homme teensandtwinks: Teens And Twinks transcest: Transcest twinks: Twinks twinkloads: Twink Loads twinktop: Twink Top URL: //base/@href Tags: &tags Name: $scene//div[@id="catMovie"]//text() Performers: &performers Name: $scene//div[contains(@class, "modelProfile")]//h2 | $scene//div[contains(@class, "modelProfile")]//h3 networkScraper: common: $scene: ((//div[contains(@class, "main")])[1]/div)[1] scene: Title: *title Details: selector: $scene//div[@class='textDescription']//text()[not(parent::span[@id='firstWords' or @id='readmore'])] concat: "\n\n" Image: *image Code: selector: (//source/@src)[1] postProcess: - replace: - regex: .*(\w{3}\d{4}).trailer.* with: $1 # Some of these trailer URLs will not contain the studio code # so we need to remove those manually here - regex: ^https.* with: Date: selector: $scene//div[@class="releasedate"] postProcess: - replace: *cleanDate - parseDate: January 02, 2006 URL: # All scenes on network sites should be available from their subsites as well # so we construct a valid link to the subsite both to encourage people # to scrape from the canonical source as well as submitting both links to StashDB: # the network site will list the duration, which is helpful when evaluating # the submitted fingerprints for the scene selector: //link[@rel="canonical"]/@href | //div[@class="logoSubsites"]//img/@alt concat: __SEPARATOR__ postProcess: - replace: - regex: .*(?Pvideos/.*)__SEPARATOR__(?P.*) with: https://$domain.com/$path - regex: _vids with: "" # This table is should contain the same sites Studio Name in sceneScraper - regex: AmericanMuscleHunks with: americanmusclehunks - regex: BangBangBoys with: bangbangboys - regex: BoundTwinks with: boundtwinks - regex: BoyForSale with: boyforsale - regex: CumDumpSluts with: cumdumpsluts - regex: DirtyBoySociety with: dirtyboysociety - regex: EdwardJames with: edwardjames - regex: FTMmen with: ftmmen - regex: FunsizeBoys with: funsizeboys - regex: Gaycest with: gaycest - regex: HungFuckers with: hungfuckers - regex: JasonSparksLive with: jasonsparkslive - regex: JalifStudio with: jalifstudio - regex: JockBreeders with: jockbreeders - regex: JockPussy with: jockpussy - regex: MasonicBoys with: masonicboys - regex: RawFuckBoys with: rawfuckboys - regex: ScoutBoys with: scoutboys - regex: StagHomme with: staghomme - regex: TeensAndTwinks with: teensandtwinks - regex: Transcest with: transcest - regex: Twinks with: twinks - regex: Twinkloads with: twinkloads - regex: TwinkTop with: twinktop Tags: Name: $scene//div[@class="update_tags"]//text() Performers: Name: $scene//div[@id="models"]//h4 Studio: Name: selector: //div[@class="logoSubsites"]//img/@alt postProcess: - replace: # Turn PascalCaseWords to Pascal Case Words - regex: ([a-z])([A-Z]) with: $1 $2 URL: selector: //div[@class="logoSubsites"]//img/@alt postProcess: - replace: - regex: (?P.*) with: https://$domain.com chapterSceneScraper: common: $scene: *sceneContainer # The differences from the regular sceneScraper is that we # shuffle the title around a little to reflect what's shown # on the page instead of in the title bar (also makes them easier to sort) # and we use the series as a movie so that people can group their scenes together scene: Title: selector: //title/text() postProcess: - replace: # https://regex101.com/r/y1Clkp/2 - regex: (?P.*?) - (?P<series>.*?) - (?P<chapter>.*?) \|.* with: $series - $chapter - $title Details: *details Date: *dateSubscraper Code: *studioCode Image: *image URL: *url Studio: *studio Movies: Name: //span[contains(@class, "dvdTitleScene")] URL: //span[contains(@class, "dvdTitleScene")]//a/@href Tags: *tags Performers: *performers # Last Updated September 21, 2023