stash
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
name: "GloryHoleSwallow"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- gloryholeswallow.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$info: //div[@class='objectInfo']
|
||||
scene:
|
||||
Title: $info/h1
|
||||
Date:
|
||||
selector: $info/h1
|
||||
postProcess:
|
||||
- replace:
|
||||
# Try to match their current date format
|
||||
- regex: .*?(\w{3})\.\s*(\d{1,2}),\s*(\d{4}).*
|
||||
with: $1 $2, $3
|
||||
- parseDate: Jan 2, 2006
|
||||
- replace:
|
||||
# Strip out non-date text in case title does not follow the expected format
|
||||
- regex: .*?(\d{4}-\d{2}-\d{2})?.*
|
||||
with: $1
|
||||
Details:
|
||||
selector: $info/div[@class='content']/p/text()
|
||||
concat: "\n\n"
|
||||
Tags:
|
||||
Name: $info//p[contains(text(),'Tags')]//a/text()
|
||||
Image:
|
||||
selector: //div[@id='fakeplayer']//img/@src0_1x
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: https://gloryholeswallow.com
|
||||
Studio:
|
||||
Name:
|
||||
fixed: GloryHole Swallow
|
||||
URL: //link[@rel='canonical']/@href
|
||||
# Last Updated April 04, 2021
|
||||
Reference in New Issue
Block a user