stash
This commit is contained in:
134
stash/config/scrapers/community/data18/data18.yml
Normal file
134
stash/config/scrapers/community/data18/data18.yml
Normal file
@@ -0,0 +1,134 @@
|
||||
name: data18
|
||||
movieByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- data18.com/movies
|
||||
scraper: movieScraper
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- data18.com/scenes
|
||||
scraper: sceneScraper
|
||||
# Many people have single-file movies and want to scrape them
|
||||
# as scenes instead of making a single-scene Movie object
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- data18.com/movies
|
||||
scraper: movieScraper
|
||||
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //div[h3[text()='Pornstars / Cast']]//a[@class='bold gen']
|
||||
$studio: //div[@id="body2div_b"]//a[contains(@href,"/studios/")]
|
||||
$movie: //b[text()="Movie:"]/following-sibling::a[1]
|
||||
scene:
|
||||
Title: //span/following-sibling::h1/a/text()
|
||||
Date:
|
||||
selector: //span[b[text()="Release date"]]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: '^Release date: ([a-zA-Z]+)(\s*\d\d)?,\s*(\d{4}).+'
|
||||
with: "$1$2, $3"
|
||||
- parseDate: January 02, 2006
|
||||
- parseDate: January, 2006
|
||||
Details:
|
||||
selector: //div[b[text()="Story"]] | //b[contains(text(),"Movie Description")]/../text()
|
||||
concat: " "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Story - "
|
||||
with:
|
||||
Tags:
|
||||
Name: //b[text()='Categories:']/following-sibling::a
|
||||
Performers:
|
||||
Name: $performer
|
||||
URL: $performer/@href
|
||||
Studio:
|
||||
Name: $studio
|
||||
URL: $studio/@href
|
||||
Movies:
|
||||
Name: $movie/text()
|
||||
URL: $movie/@href
|
||||
Image: //img[@id="playpriimage"]/@src
|
||||
movieScraper:
|
||||
common:
|
||||
$movieInfo: //div[@id="body2div_b"]
|
||||
$studio: //b[text()='Studio']/following-sibling::b/a
|
||||
$performer: //div[h3[contains(text(), 'Pornstars / Cast')]]//a[@class='bold gen']
|
||||
movie:
|
||||
Name:
|
||||
selector: //title
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (.+?)(?:\s\(\d{4}\)\sPorn\sMovie\s\|\sDATA18)
|
||||
with: $1
|
||||
Duration:
|
||||
selector: $movieInfo//b[contains(text(),"Length")]/following-sibling::span|$movieInfo//b[contains(text(),"Length")]/following-sibling::text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^\[(.+)\]$ # handle movies with proper [xx:xx:xx] duration
|
||||
with: $1
|
||||
- regex: ^[^\d]*(\d+)\s*min.* # handle movies with only xx mins duration
|
||||
with: "$1:00"
|
||||
Date:
|
||||
selector: $movieInfo//span[contains(text(), "Release date")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: 'Release date:\s*'
|
||||
with:
|
||||
- parseDate: January, 2006
|
||||
Studio:
|
||||
Name: $studio/text()
|
||||
URL: $studio/@href
|
||||
Director: //p[b[contains(text(),'Director')]]//a[@class='bold']
|
||||
Synopsis:
|
||||
selector: //b[text()="Description"]/..
|
||||
concat: " "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: '^Description\s*-\s*'
|
||||
with:
|
||||
FrontImage: //a[@id='enlargecover']/@data-featherlight
|
||||
BackImage: //a[text()='+Back']/@href
|
||||
scene:
|
||||
Title:
|
||||
selector: //title
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (.+?)(?:\s\(\d{4}\)\sPorn\sMovie\s\|\sDATA18)
|
||||
with: $1
|
||||
Date:
|
||||
selector: $movieInfo//span[contains(text(), "Release date")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: 'Release date:\s*'
|
||||
with:
|
||||
- parseDate: January, 2006
|
||||
Studio:
|
||||
Name: $studio/text()
|
||||
URL: $studio/@href
|
||||
Director: //p[b[contains(text(),'Director')]]//a[@class='bold']
|
||||
Performers:
|
||||
Name: $performer
|
||||
URL: $performer/@href
|
||||
Details:
|
||||
selector: //b[text()="Description"]/..
|
||||
concat: " "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: '^Description\s*-\s*'
|
||||
with:
|
||||
Image: //a[@id='enlargecover']/@data-featherlight
|
||||
driver:
|
||||
cookies:
|
||||
- CookieURL: "https://data18.com"
|
||||
Cookies:
|
||||
- Name: "data_user_captcha"
|
||||
Domain: ".data18.com"
|
||||
Value: "1"
|
||||
Path: "/"
|
||||
headers:
|
||||
- Key: User-Agent
|
||||
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0)
|
||||
# Last Updated July 18, 2023
|
||||
9
stash/config/scrapers/community/data18/manifest
Executable file
9
stash/config/scrapers/community/data18/manifest
Executable file
@@ -0,0 +1,9 @@
|
||||
id: data18
|
||||
name: data18
|
||||
metadata: {}
|
||||
version: fa7e333
|
||||
date: "2024-03-16 20:51:17"
|
||||
requires: []
|
||||
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
|
||||
files:
|
||||
- data18.yml
|
||||
Reference in New Issue
Block a user