86 lines
2.3 KiB
YAML
86 lines
2.3 KiB
YAML
name: VNAGirls
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- alexlegend.com
|
|
- angelinacastrolive.com
|
|
- bestoftealconrad.com
|
|
- blownbyrone.com
|
|
- bobbiedenlive.com
|
|
- carmenvalentina.com
|
|
- charleechaselive.com
|
|
- deauxmalive.com
|
|
- foxxedup.com
|
|
- fuckedfeet.com
|
|
- girlgirlmania.com
|
|
- itscleolive.com
|
|
- jelenajensen.com
|
|
- juliaannlive.com
|
|
- kendrajames.com
|
|
- kimberleelive.com
|
|
- kink305.com
|
|
- maggiegreenlive.com
|
|
- maxinex.com
|
|
- nataliastarr.com
|
|
- nikkibenz.com
|
|
- ninakayy.com
|
|
- pennypaxlive.com
|
|
- povmania.com
|
|
- pumaswedexxx.com
|
|
- rachelstormsxxx.com
|
|
- romemajor.com
|
|
- rubberdoll.net
|
|
- samanthagrace.net
|
|
- sarajay.com
|
|
- sexmywife.com
|
|
- shandafay.com
|
|
- siripornstar.com
|
|
- sophiedeelive.com
|
|
- sunnylanelive.com
|
|
- vickyathome.com
|
|
scraper: sceneScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
scene:
|
|
Performers:
|
|
Name:
|
|
selector: //div[@class='customcontent']/h3/text()
|
|
postProcess: &pp
|
|
- replace:
|
|
- regex: \x{0020}|\x{00A0} # unicode SP, NBSP
|
|
with: " "
|
|
- regex: '\s*,\s*'
|
|
with: ","
|
|
split: ","
|
|
Title:
|
|
selector: //div[@class='customcontent']/h1/text()
|
|
Details:
|
|
selector: //div[@class='customcontent']/div[1]/text()|//div[@class='customcontent']/h2/text()
|
|
Tags:
|
|
Name:
|
|
selector: //div[@class='customcontent']/h4/text()
|
|
postProcess: *pp
|
|
split: ","
|
|
Studio:
|
|
Name:
|
|
selector: //p[@class="copyright"]
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*\s(\w+)\.(?i)com\s.+
|
|
with: $1
|
|
Image:
|
|
selector: //p[@class="copyright"]|//center/img/@src
|
|
concat: "|"
|
|
postProcess:
|
|
- replace:
|
|
- regex: .*\s(\w+\.(?i)com)\s.+\|(.+)
|
|
with: https://www.$1/$2
|
|
Date:
|
|
selector: //div[@class='date-and-covers']/div[@class='date']/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d+)(st|nd|rd|th)
|
|
with: "$1"
|
|
- parseDate: January 2 2006
|
|
# Last Updated March 25, 2022
|