name: Hegre sceneByURL: - action: scrapeXPath url: - hegre.com/movies - hegre.com/films - hegre.com/massage - hegre.com/sexed scraper: sceneScraper sceneByName: action: scrapeXPath queryURL: "https://www.hegre.com/?types=Film&q={}" scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper galleryByURL: - action: scrapeXPath url: - hegre.com scraper: galleryScraper performerByURL: - action: scrapeXPath url: - hegre.com scraper: performerScraper performerByName: action: scrapeXPath queryURL: https://www.hegre.com/?q={}&types=Model scraper: performerSearch xPathScrapers: sceneSearch: scene: Title: selector: //a[@class='open-in-content-overlay']/@title Date: selector: //a[@class='open-in-content-overlay']//div[@class='details']/span[last()]/text() postProcess: - replace: - regex: (st|nd|rd|th) with: "" - parseDate: Jan 2, 2006 URL: selector: //a[@class='open-in-content-overlay']/@href postProcess: - replace: - regex: ^ with: "https://www.hegre.com" Image: selector: //a[@class='open-in-content-overlay']//div[@class='img-holder']//img/@data-src sceneScraper: common: $actor: //a[@class="record-model"] scene: Title: //meta[@property="og:title"]/@content URL: //link[@hreflang="x-default"]/@href Details: selector: | //div[contains(@class,"film-subtitle")]/text() |//div[@class="record-details-column"]//div[contains(@class, "massage-")]//text() |//div[@class="film-header"]/div[@class="intro"]//text() concat: "\n\n" Date: selector: //span[@class="date"]/text() postProcess: - parseDate: January 2, 2006 Image: //meta[@property="og:image"]/@content Studio: Name: fixed: Hegre Tags: Name: //div[@class="approved-tags"]/a Performers: Name: $actor/@title URL: selector: $actor//a/@href postProcess: - replace: - regex: ^ with: https://hegre.com galleryScraper: common: $actor: //a[@class="record-model"] gallery: Title: //div[@class="record-toolbar clearfix"]//h1 Date: selector: //span[@class="date"]/text() postProcess: - parseDate: January 2, 2006 Studio: Name: fixed: Hegre Tags: Name: //div[@class="approved-tags"]/a Performers: Name: $actor/@title URL: selector: $actor//a/@href postProcess: - replace: - regex: ^ with: https://hegre.com performerSearch: performer: Name: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//a/@title URL: selector: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//div[@id="search-results-wrapper"]//a/@href postProcess: - replace: - regex: ^ with: https://www.hegre.com performerScraper: # Not much, but better than nothing, since most of the performers just have forenames performer: Name: //span[@class="label"][contains(text(),"Name")]/following-sibling::span/text() Gender: fixed: Female Birthdate: selector: //span[@class="label"][contains(text(),"Born")]/following-sibling::span postProcess: - replace: - regex: ^ with: "01 " - parseDate: 02 January, 2006 Country: selector: //span[@class="label"][contains(text(),"Country")]/following-sibling::span/text() Height: selector: //span[@class="label"][contains(text(),"Height")]/following-sibling::span/text() postProcess: - replace: - regex: cm with: "" Image: selector: //div[@class="details"]//img/@src # Newer models allow for 1440x postProcess: - replace: - regex: \d{3}x with: 640x Details: selector: //div[@class="description translated-text"]//child::text() concat: "\n\n" # Last Updated November 03, 2022