name: PegasProductions # This studio has two formats for scenes depending on URL. The less useful format has '/abonnements' to # start the path while the more useful one does not. Scenes appear to be available in both formats, but # there is no map between the resources referenced in each style of URL. sceneByURL: - action: scrapeXPath url: - pegasproductions.com/abonnements scraper: abonnementsScraper - action: scrapeXPath url: - pegasproductions.com scraper: sceneScraper xPathScrapers: abonnementsScraper: scene: Title: //meta[@itemprop="name"]/@content Date: selector: //meta[@itemprop="uploadDate"]/@content postProcess: - replace: - regex: ^([^T]+).+ with: $1 Performers: Name: selector: //h1 postProcess: - replace: - regex: ^.+?([^:]*)$ with: $1 - regex: '[^a-zA-Z0-9\s]' with: '' Image: //meta[@itemprop="thumbnailUrl"]/@content Studio: Name: fixed: Pegas Productions sceneScraper: scene: Title: selector: //span[@itemprop="name"] # This format has options for English and French language versions of the scene. Depending on # the language the values we key off for Performers and Director will be in English or French. Director: //p[contains(b,"Director") or contains(b,"Réalisateur")]/text() Performers: Name: //div[@class="span10"]/p[contains(b,"Starring") or contains(b,"Distribution")]/a Date: selector: //div[@id="date-duree"]/div[1]/p[1] # English URLs display dates in the format of 02/01/2006. # French URLs display dates in the format of 01 janvier 2006, using the full french name of the month. # This bit of hackery converts the dates used in the French version to format used on the English page. postProcess: - replace: - regex: janvier with: "01" - regex: f[eé]vrier with: "02" - regex: mars with: "03" - regex: avril with: "04" - regex: mai with: "05" - regex: juin with: "06" - regex: juillet with: "07" - regex: aout with: "08" - regex: septembre with: "09" - regex: octobre with: "10" - regex: novembre with: "11" - regex: d[eé]cembre with: "12" - regex: \s with: / - parseDate: 02/01/2006 Tags: Name: selector: //div[@class="span10"]/p[contains(b,"Tags")] postProcess: - replace: - regex: "Tags:" with: split: ", " Details: selector: //h5[@itemprop="description"]/p Image: selector: //script[contains(text(),"flowplayer(container")]/text() postProcess: - replace: - regex: .*?(https?:\/\/\S*\.jpg).* with: $1 Studio: Name: fixed: Pegas Productions driver: headers: - Key: User-Agent Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) cookies: - CookieURL: "https://www.pegasproductions.com/" Cookies: - Name: "langue" Domain: ".pegasproductions.com" Value: "en" Path: "/" # Last Updated March 29, 2024