41 lines
1.1 KiB
YAML
41 lines
1.1 KiB
YAML
name: "GloryHoleSwallow"
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- gloryholeswallow.com
|
|
scraper: sceneScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$info: //div[@class='objectInfo']
|
|
scene:
|
|
Title: $info/h1
|
|
Date:
|
|
selector: $info/h1
|
|
postProcess:
|
|
- replace:
|
|
# Try to match their current date format
|
|
- regex: .*?(\w{3})\.\s*(\d{1,2}),\s*(\d{4}).*
|
|
with: $1 $2, $3
|
|
- parseDate: Jan 2, 2006
|
|
- replace:
|
|
# Strip out non-date text in case title does not follow the expected format
|
|
- regex: .*?(\d{4}-\d{2}-\d{2})?.*
|
|
with: $1
|
|
Details:
|
|
selector: $info/div[@class='content']/p/text()
|
|
concat: "\n\n"
|
|
Tags:
|
|
Name: $info//p[contains(text(),'Tags')]//a/text()
|
|
Image:
|
|
selector: //div[@id='fakeplayer']//img/@src0_1x
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://gloryholeswallow.com
|
|
Studio:
|
|
Name:
|
|
fixed: GloryHole Swallow
|
|
URL: //link[@rel='canonical']/@href
|
|
# Last Updated April 04, 2021
|