Files
compose-projects-arr/stash/config/scrapers/community/PKFStudios/PKFStudios.yml
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

58 lines
2.1 KiB
YAML

name: PKF Studios
sceneByURL:
- action: scrapeXPath
url:
- pkfstudios.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h1[@class="entry-title"]/text()
Details:
# Description is a sequence of p elements containing ONLY text
selector: //div[@class="entry-content"]/p[not(*)]/text()
concat: "\n\n"
# Remove the trailing "_ _ _ _ _" separator
postProcess:
- replace:
- regex: "(\n\n[_ ]+)"
with: ""
Date:
selector: //span[@class="entry-date"]//text()
postProcess:
- parseDate: January 2, 2006
Image:
# Images are loaded with javascript, we'll just grab the last image
# from the srcset because it's usually the largest
selector: //div[contains(@class, "post-thumbnail")]/img/@data-lazy-srcset
postProcess:
- replace:
- regex: ^.*\s+(https://.*)\s+\d+w$
with: $1
Studio:
Name:
fixed: "PKF Studios"
Tags:
Name:
# First selector will match when the tags are outside of the <strong> tag
selector: //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/following-sibling::text() | //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/text()
postProcess:
- replace:
- regex: (?:.*:\s+)?([^.]*).?
with: $1
split: ", "
Performers:
Name:
# Sometimes the performers are listed in a separate tag, sometimes they're in a paragraph mixed in with the description
selector: //div[@class="entry-content"]//*[contains(text(), "Starring")]/text() | //p[contains(., "Starring")]
postProcess:
- replace:
- regex: ".*Starring (.*)"
with: $1
- regex: "(?i)directed by johnm"
with: ""
- regex: ", (and)?"
with: " and "
split: " and "
# Last Updated December 05, 2023