Files
compose-projects-arr/stash/config/scrapers/community/PegasProductions/PegasProductions.yml
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

120 lines
3.6 KiB
YAML

name: PegasProductions
# This studio has two formats for scenes depending on URL. The less useful format has '/abonnements' to
# start the path while the more useful one does not. Scenes appear to be available in both formats, but
# there is no map between the resources referenced in each style of URL.
sceneByURL:
- action: scrapeXPath
url:
- pegasproductions.com/abonnements
scraper: abonnementsScraper
- action: scrapeXPath
url:
- pegasproductions.com
scraper: sceneScraper
xPathScrapers:
abonnementsScraper:
scene:
Title: //meta[@itemprop="name"]/@content
Date:
selector: //meta[@itemprop="uploadDate"]/@content
postProcess:
- replace:
- regex: ^([^T]+).+
with: $1
Performers:
Name:
selector: //h1
postProcess:
- replace:
- regex: ^.+?([^:]*)$
with: $1
- regex: '[^a-zA-Z0-9\s]'
with: ''
Image: //meta[@itemprop="thumbnailUrl"]/@content
Studio:
Name:
fixed: Pegas Productions
sceneScraper:
scene:
Title:
selector: //span[@itemprop="name"]
# This format has options for English and French language versions of the scene. Depending on
# the language the values we key off for Performers and Director will be in English or French.
Director: //p[contains(b,"Director") or contains(b,"Réalisateur")]/text()
Performers:
Name: //div[@class="span10"]/p[contains(b,"Starring") or contains(b,"Distribution")]/a
Date:
selector: //div[@id="date-duree"]/div[1]/p[1]
# English URLs display dates in the format of 02/01/2006.
# French URLs display dates in the format of 01 janvier 2006, using the full french name of the month.
# This bit of hackery converts the dates used in the French version to format used on the English page.
postProcess:
- replace:
- regex: janvier
with: "01"
- regex: f[eé]vrier
with: "02"
- regex: mars
with: "03"
- regex: avril
with: "04"
- regex: mai
with: "05"
- regex: juin
with: "06"
- regex: juillet
with: "07"
- regex: aout
with: "08"
- regex: septembre
with: "09"
- regex: octobre
with: "10"
- regex: novembre
with: "11"
- regex: d[eé]cembre
with: "12"
- regex: \s
with: /
- parseDate: 02/01/2006
Tags:
Name:
selector: //div[@class="span10"]/p[contains(b,"Tags")]
postProcess:
- replace:
- regex: "Tags:"
with:
split: ", "
Details:
selector: //h5[@itemprop="description"]/p
Image:
selector: //script[contains(text(),"flowplayer(container")]/text()
postProcess:
- replace:
- regex: .*?(https?:\/\/\S*\.jpg).*
with: $1
Studio:
Name:
fixed: Pegas Productions
driver:
headers:
- Key: User-Agent
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0)
cookies:
- CookieURL: "https://www.pegasproductions.com/"
Cookies:
- Name: "langue"
Domain: ".pegasproductions.com"
Value: "en"
Path: "/"
# Last Updated March 29, 2024