This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,152 @@
name: Hegre
sceneByURL:
- action: scrapeXPath
url:
- hegre.com/movies
- hegre.com/films
- hegre.com/massage
- hegre.com/sexed
scraper: sceneScraper
sceneByName:
action: scrapeXPath
queryURL: "https://www.hegre.com/?types=Film&q={}"
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
galleryByURL:
- action: scrapeXPath
url:
- hegre.com
scraper: galleryScraper
performerByURL:
- action: scrapeXPath
url:
- hegre.com
scraper: performerScraper
performerByName:
action: scrapeXPath
queryURL: https://www.hegre.com/?q={}&types=Model
scraper: performerSearch
xPathScrapers:
sceneSearch:
scene:
Title:
selector: //a[@class='open-in-content-overlay']/@title
Date:
selector: //a[@class='open-in-content-overlay']//div[@class='details']/span[last()]/text()
postProcess:
- replace:
- regex: (st|nd|rd|th)
with: ""
- parseDate: Jan 2, 2006
URL:
selector: //a[@class='open-in-content-overlay']/@href
postProcess:
- replace:
- regex: ^
with: "https://www.hegre.com"
Image:
selector: //a[@class='open-in-content-overlay']//div[@class='img-holder']//img/@data-src
sceneScraper:
common:
$actor: //a[@class="record-model"]
scene:
Title: //meta[@property="og:title"]/@content
URL: //link[@hreflang="x-default"]/@href
Details:
selector: |
//div[contains(@class,"film-subtitle")]/text()
|//div[@class="record-details-column"]//div[contains(@class, "massage-")]//text()
|//div[@class="film-header"]/div[@class="intro"]//text()
concat: "\n\n"
Date:
selector: //span[@class="date"]/text()
postProcess:
- parseDate: January 2, 2006
Image: //meta[@property="og:image"]/@content
Studio:
Name:
fixed: Hegre
Tags:
Name: //div[@class="approved-tags"]/a
Performers:
Name: $actor/@title
URL:
selector: $actor//a/@href
postProcess:
- replace:
- regex: ^
with: https://hegre.com
galleryScraper:
common:
$actor: //a[@class="record-model"]
gallery:
Title: //div[@class="record-toolbar clearfix"]//h1
Date:
selector: //span[@class="date"]/text()
postProcess:
- parseDate: January 2, 2006
Studio:
Name:
fixed: Hegre
Tags:
Name: //div[@class="approved-tags"]/a
Performers:
Name: $actor/@title
URL:
selector: $actor//a/@href
postProcess:
- replace:
- regex: ^
with: https://hegre.com
performerSearch:
performer:
Name: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//a/@title
URL:
selector: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//div[@id="search-results-wrapper"]//a/@href
postProcess:
- replace:
- regex: ^
with: https://www.hegre.com
performerScraper:
# Not much, but better than nothing, since most of the performers just have forenames
performer:
Name: //span[@class="label"][contains(text(),"Name")]/following-sibling::span/text()
Gender:
fixed: Female
Birthdate:
selector: //span[@class="label"][contains(text(),"Born")]/following-sibling::span
postProcess:
- replace:
- regex: ^
with: "01 "
- parseDate: 02 January, 2006
Country:
selector: //span[@class="label"][contains(text(),"Country")]/following-sibling::span/text()
Height:
selector: //span[@class="label"][contains(text(),"Height")]/following-sibling::span/text()
postProcess:
- replace:
- regex: cm
with: ""
Image:
selector: //div[@class="details"]//img/@src
# Newer models allow for 1440x
postProcess:
- replace:
- regex: \d{3}x
with: 640x
Details:
selector: //div[@class="description translated-text"]//child::text()
concat: "\n\n"
# Last Updated November 03, 2022