102 lines
2.7 KiB
YAML
102 lines
2.7 KiB
YAML
name: MyDirtyHobby
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- mydirtyhobby.com/profil/
|
|
scraper: sceneScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$script: //script[contains(text(),"profile_page")]/text()
|
|
scene:
|
|
Title:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*"title":{"text":"([^"]+)"},"subtitle".*'
|
|
with: $1
|
|
- regex: '\\u00df'
|
|
with: "ß"
|
|
- regex: '\\u00e4'
|
|
with: "ä"
|
|
- regex: '\\u00f6'
|
|
with: "ö"
|
|
- regex: '\\u00fc'
|
|
with: "ü"
|
|
- regex: '\\u00c4'
|
|
with: "Ä"
|
|
- regex: '\\u00d6'
|
|
with: "Ö"
|
|
- regex: '\\u00dc'
|
|
with: "Ü"
|
|
|
|
Details:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*"description":{"text":"(.+)","moreText.*'
|
|
with: $1
|
|
- regex: "'"
|
|
with: "'"
|
|
- regex: '<a[^>]+>([^<]+)<\\/a>' #anchors
|
|
with: "$1"
|
|
- regex: '\\u00df'
|
|
with: "ß"
|
|
- regex: '\\u00e4'
|
|
with: "ä"
|
|
- regex: '\\u00f6'
|
|
with: "ö"
|
|
- regex: '\\u00fc'
|
|
with: "ü"
|
|
- regex: '\\u00c4'
|
|
with: "Ä"
|
|
- regex: '\\u00d6'
|
|
with: "Ö"
|
|
- regex: '\\u00dc'
|
|
with: "Ü"
|
|
Date:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*"subtitle":{"text":"([^"]+)"},"rating".*'
|
|
with: $1
|
|
- parseDate: 02 Jan 2006
|
|
Performers:
|
|
Name:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*"thumbImg":{"title":"([^"]+)".*'
|
|
with: $1
|
|
Tags:
|
|
Name:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*"Categories","items":\[([^]]+)\].*'
|
|
with: $1
|
|
- regex: '{"text":"([^"]+)","href":"[^"]+"}'
|
|
with: $1
|
|
split: ","
|
|
Image:
|
|
selector: $script
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*?"thumbnail":\s*{[^}]+"src":\s*"([^"]+)".*'
|
|
with: $1
|
|
- regex: '\\/'
|
|
with: "/"
|
|
Studio:
|
|
Name:
|
|
fixed: My Dirty Hobby
|
|
|
|
driver:
|
|
cookies:
|
|
- CookieURL: "https://www.mydirtyhobby.com"
|
|
Cookies:
|
|
- Name: "AGEGATEPASSED"
|
|
Domain: ".mydirtyhobby.com"
|
|
Value: "1"
|
|
Path: "/"
|
|
# Last Updated October 02, 2023
|