name: MyDirtyHobby sceneByURL: - action: scrapeXPath url: - mydirtyhobby.com/profil/ scraper: sceneScraper xPathScrapers: sceneScraper: common: $script: //script[contains(text(),"profile_page")]/text() scene: Title: selector: $script postProcess: - replace: - regex: '.*"title":{"text":"([^"]+)"},"subtitle".*' with: $1 - regex: '\\u00df' with: "ß" - regex: '\\u00e4' with: "ä" - regex: '\\u00f6' with: "ö" - regex: '\\u00fc' with: "ü" - regex: '\\u00c4' with: "Ä" - regex: '\\u00d6' with: "Ö" - regex: '\\u00dc' with: "Ü" Details: selector: $script postProcess: - replace: - regex: '.*"description":{"text":"(.+)","moreText.*' with: $1 - regex: "'" with: "'" - regex: ']+>([^<]+)<\\/a>' #anchors with: "$1" - regex: '\\u00df' with: "ß" - regex: '\\u00e4' with: "ä" - regex: '\\u00f6' with: "ö" - regex: '\\u00fc' with: "ü" - regex: '\\u00c4' with: "Ä" - regex: '\\u00d6' with: "Ö" - regex: '\\u00dc' with: "Ü" Date: selector: $script postProcess: - replace: - regex: '.*"subtitle":{"text":"([^"]+)"},"rating".*' with: $1 - parseDate: 02 Jan 2006 Performers: Name: selector: $script postProcess: - replace: - regex: '.*"thumbImg":{"title":"([^"]+)".*' with: $1 Tags: Name: selector: $script postProcess: - replace: - regex: '.*"Categories","items":\[([^]]+)\].*' with: $1 - regex: '{"text":"([^"]+)","href":"[^"]+"}' with: $1 split: "," Image: selector: $script postProcess: - replace: - regex: '.*?"thumbnail":\s*{[^}]+"src":\s*"([^"]+)".*' with: $1 - regex: '\\/' with: "/" Studio: Name: fixed: My Dirty Hobby driver: cookies: - CookieURL: "https://www.mydirtyhobby.com" Cookies: - Name: "AGEGATEPASSED" Domain: ".mydirtyhobby.com" Value: "1" Path: "/" # Last Updated October 02, 2023