98 lines
3.5 KiB
YAML
98 lines
3.5 KiB
YAML
name: hussiemodels
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- hookuphotshot.com/trailers/
|
|
- hotandtatted.com/trailers/
|
|
- hussiepass.com/trailers/
|
|
- seehimfuck.com/trailers/
|
|
- seehimsolo.com/trailers/
|
|
- povpornstars.com
|
|
scraper: hussieScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- hussiepass.com/dvds/
|
|
- seehimfuck.com/dvds/
|
|
- seehimsolo.com/dvds/
|
|
scraper: hussieScraper
|
|
xPathScrapers:
|
|
hussieScraper:
|
|
common:
|
|
$image: //img[contains(@class, "update_thumb") or contains(@class, "trailer-thumb")]
|
|
movie:
|
|
Name: //div[contains(@class, "profile-details")]/h3
|
|
Synopsis: &dvdSynopsis //div[contains(@class, "profile-about")]/p/text()
|
|
FrontImage: &dvdCover
|
|
selector: >-
|
|
//head/base/@href |
|
|
//img[contains(@class, "dvd_box")]/@src0_4x |
|
|
//img[contains(@class, "dvd_box")]/@src0_3x |
|
|
//img[contains(@class, "dvd_box")]/@src0_2x |
|
|
//img[contains(@class, "dvd_box")]/@src0_1x
|
|
concat: __SEPARATOR__
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.*?)__SEPARATOR__(.*?)(:?__SEPARATOR).*
|
|
with: $1$2
|
|
Studio: &studio
|
|
Name:
|
|
selector: //head/base/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^https://(?:[^.]+\.)?([^.]+)\.com/.*
|
|
with: $1
|
|
- map:
|
|
hookuphotshot: HookUp Hotshot
|
|
hotandtatted: Hot and Tatted
|
|
hussiepass: Hussie Pass
|
|
seehimfuck: See Him Fuck
|
|
seehimsolo: See Him Solo
|
|
povpornstars: POV Pornstars
|
|
scene:
|
|
Title: //div[contains(@class, "videoDetails")]/*[starts-with(name(), 'h')]
|
|
Date:
|
|
selector: //div[contains(@class, "videoInfo")]/p[contains(span, "Date Added:")]/text()
|
|
postProcess:
|
|
# Only HookupHotshot still uses this format but the others use YYYY-MM-DD
|
|
# so they'll fall through and still be correct wrt. expected Stash date format
|
|
- parseDate: January 2, 2006
|
|
Studio: *studio
|
|
Performers:
|
|
Name: //li[contains(@class, "update_models")]//a
|
|
Tags:
|
|
Name: //div[contains(@class, "featuring")]//a[contains(@href, "/categories/")]
|
|
Movies:
|
|
Name: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]
|
|
URL: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
|
|
FrontImage:
|
|
selector: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
|
|
postProcess:
|
|
- subScraper: *dvdCover
|
|
Synopsis:
|
|
selector: //div[contains(@class, "featuring")]//a[contains(@href, "/dvds/")]/@href
|
|
postProcess:
|
|
- subScraper: *dvdSynopsis
|
|
# this fails but I think it's a bug in Stash related to scraping more than one level of nesting
|
|
# Studio: *studio
|
|
Details:
|
|
selector: //div[contains(@class, "videoDetails")]/p
|
|
postProcess:
|
|
- replace:
|
|
# Where the <br> elements used to be
|
|
- regex: ([!.?])([^!.?\s])
|
|
with: "$1\n\n$2"
|
|
Image:
|
|
selector: //meta[@property="og:image"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: 1x
|
|
with: 4x
|
|
# Some people prefer the higher res scene preview images
|
|
# selector: >-
|
|
# $image/@src0_4x |
|
|
# $image/@src0_3x |
|
|
# $image/@src0_2x |
|
|
# $image/@src0_1x
|
|
# Last Updated January 10, 2024
|