135 lines
4.6 KiB
YAML
135 lines
4.6 KiB
YAML
name: Raunchy Bastards
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- boundjocks.com/scene/
|
|
- boyshalfwayhouse.com/scene/
|
|
- coltstudiogroup.com/scene/
|
|
- daddycarl.com/scene/
|
|
- hotoldermale.com/scene/
|
|
- monstercub.com/scene/
|
|
- naturalbornbreeders.com/scene/
|
|
- older4me.com/scene/
|
|
- raunchybastards.com/scene/
|
|
- stockydudes.com/scene/
|
|
- toplatindaddies.com/scene/
|
|
scraper: oldStyleSite
|
|
- action: scrapeXPath
|
|
url:
|
|
- blackboyaddictionz.com/scene/
|
|
- blacksondaddies.com/scene/
|
|
- myfirstdaddy.com/scene/
|
|
- playdaddy.com/scene/
|
|
scraper: newStyleSite
|
|
xPathScrapers:
|
|
oldStyleSite:
|
|
common:
|
|
$scene: //div[contains(@class, "sceneContainer")]
|
|
scene:
|
|
Title: $scene/div[@class="sceneTitle"]
|
|
Code:
|
|
selector: $scene//div[contains(@class, "sceneImgBig")]/@id
|
|
postProcess:
|
|
- replace:
|
|
- regex: \D*
|
|
with: $1
|
|
Date:
|
|
selector: $scene//span[contains(@class, "sceneDetails")]
|
|
postProcess: &ppDate
|
|
- replace:
|
|
# https://regex101.com/r/rsjbb6/3
|
|
- regex: ^(?:Details:\s*)?(\w{3}\s*\d{1,2}),\s*(\d{4}).*?$
|
|
with: $1, $2
|
|
- parseDate: Jan 2, 2006
|
|
# All of this can be replaced once scrapers get access to the URL they are scraping
|
|
Studio:
|
|
Name:
|
|
selector: &image >
|
|
$scene//video/@poster
|
|
| $scene//div[contains(@class, "sceneImgBig")]/img/@src
|
|
| //div[contains(@style, "background-image")]/@style
|
|
| //*[contains(@class, "videoTrailer") or contains(@class, "bgScene")]//@srcset
|
|
postProcess: &studioNameFromURL
|
|
- replace:
|
|
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
|
|
with: $1
|
|
- map:
|
|
blackboyaddictionz: Black Boy Addictionz
|
|
blacksondaddies: Blacks on Daddies
|
|
boundjocks: Bound Jocks
|
|
boyshalfwayhouse: Boys Halfway House
|
|
coltstudiogroup: Colt Studio Group
|
|
daddycarl: Daddy Carl
|
|
hotoldermale: Hot Older Male
|
|
monstercub: Monster Cub
|
|
myfirstdaddy: My First Daddy
|
|
naturalbornbreeders: Natural Born Breeders
|
|
older4me: Older4Me
|
|
playdaddy: Play Daddy
|
|
raunchybastards: Raunchy Bastards
|
|
stockydudees: Stocky dudes
|
|
toplatindaddies: Top Latin Daddies
|
|
URL:
|
|
selector: *image
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
|
|
with: https://$1.com
|
|
Performers:
|
|
Name: >
|
|
$scene//div[contains(@class, "scenePerformers")]/a
|
|
| $scene//div[@class="scenePerf"]/span[@class="perfName"]
|
|
URL: >
|
|
$scene//div[contains(@class, "scenePerformers")]/a/@href
|
|
| $scene//div[@class="scenePerf"]/@data-href
|
|
Tags:
|
|
Name: $scene//a[@class="sceneTagsLnk"]/text()
|
|
Details:
|
|
selector: $scene//div[contains(@class, "sceneDescription")]/text()
|
|
concat: "\n\n"
|
|
Image:
|
|
selector: *image
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*url\("(.*)"\).*
|
|
with: $1
|
|
- regex: \s*2x$
|
|
with:
|
|
|
|
newStyleSite:
|
|
common:
|
|
$details: //div[contains(@class, "container_styled_1")]
|
|
scene:
|
|
Title: //h2[@class="main_title"]
|
|
Code:
|
|
selector: //link[@rel="canonical"]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: \D*
|
|
with: $1
|
|
# All of this can be replaced once scrapers get access to the URL they are scraping
|
|
Studio:
|
|
Name:
|
|
selector: //link[@rel="canonical"]/@href
|
|
postProcess: *studioNameFromURL
|
|
URL:
|
|
selector: //link[@rel="canonical"]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
|
|
with: https://$1.com
|
|
Performers:
|
|
Name: $details//span[contains(@class, "perfImage")]/a
|
|
URL: $details//span[contains(@class, "perfImage")]/a/@href
|
|
Details:
|
|
selector: $details//p/text()
|
|
concat: "\n\n"
|
|
Date:
|
|
selector: ($details//h5[contains(text(), "Details")]/text())[1]
|
|
postProcess: *ppDate
|
|
Image:
|
|
selector: //meta[@property="og:image"]/@content
|
|
Tags:
|
|
Name: $details//h5[contains(., "Categories")]/a/text()
|
|
# Last Updated September 22, 2023
|