58 lines
2.1 KiB
YAML
58 lines
2.1 KiB
YAML
name: PKF Studios
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- pkfstudios.com
|
|
scraper: sceneScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
scene:
|
|
Title: //h1[@class="entry-title"]/text()
|
|
Details:
|
|
# Description is a sequence of p elements containing ONLY text
|
|
selector: //div[@class="entry-content"]/p[not(*)]/text()
|
|
concat: "\n\n"
|
|
# Remove the trailing "_ _ _ _ _" separator
|
|
postProcess:
|
|
- replace:
|
|
- regex: "(\n\n[_ ]+)"
|
|
with: ""
|
|
Date:
|
|
selector: //span[@class="entry-date"]//text()
|
|
postProcess:
|
|
- parseDate: January 2, 2006
|
|
Image:
|
|
# Images are loaded with javascript, we'll just grab the last image
|
|
# from the srcset because it's usually the largest
|
|
selector: //div[contains(@class, "post-thumbnail")]/img/@data-lazy-srcset
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*\s+(https://.*)\s+\d+w$
|
|
with: $1
|
|
Studio:
|
|
Name:
|
|
fixed: "PKF Studios"
|
|
Tags:
|
|
Name:
|
|
# First selector will match when the tags are outside of the <strong> tag
|
|
selector: //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/following-sibling::text() | //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (?:.*:\s+)?([^.]*).?
|
|
with: $1
|
|
split: ", "
|
|
Performers:
|
|
Name:
|
|
# Sometimes the performers are listed in a separate tag, sometimes they're in a paragraph mixed in with the description
|
|
selector: //div[@class="entry-content"]//*[contains(text(), "Starring")]/text() | //p[contains(., "Starring")]
|
|
postProcess:
|
|
- replace:
|
|
- regex: ".*Starring (.*)"
|
|
with: $1
|
|
- regex: "(?i)directed by johnm"
|
|
with: ""
|
|
- regex: ", (and)?"
|
|
with: " and "
|
|
split: " and "
|
|
# Last Updated December 05, 2023
|