153 lines
4.3 KiB
YAML
153 lines
4.3 KiB
YAML
name: Hegre
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- hegre.com/movies
|
|
- hegre.com/films
|
|
- hegre.com/massage
|
|
- hegre.com/sexed
|
|
scraper: sceneScraper
|
|
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: "https://www.hegre.com/?types=Film&q={}"
|
|
scraper: sceneSearch
|
|
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
|
|
galleryByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- hegre.com
|
|
scraper: galleryScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- hegre.com
|
|
scraper: performerScraper
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.hegre.com/?q={}&types=Model
|
|
scraper: performerSearch
|
|
|
|
xPathScrapers:
|
|
sceneSearch:
|
|
scene:
|
|
Title:
|
|
selector: //a[@class='open-in-content-overlay']/@title
|
|
Date:
|
|
selector: //a[@class='open-in-content-overlay']//div[@class='details']/span[last()]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (st|nd|rd|th)
|
|
with: ""
|
|
- parseDate: Jan 2, 2006
|
|
URL:
|
|
selector: //a[@class='open-in-content-overlay']/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "https://www.hegre.com"
|
|
Image:
|
|
selector: //a[@class='open-in-content-overlay']//div[@class='img-holder']//img/@data-src
|
|
|
|
sceneScraper:
|
|
common:
|
|
$actor: //a[@class="record-model"]
|
|
scene:
|
|
Title: //meta[@property="og:title"]/@content
|
|
URL: //link[@hreflang="x-default"]/@href
|
|
Details:
|
|
selector: |
|
|
//div[contains(@class,"film-subtitle")]/text()
|
|
|//div[@class="record-details-column"]//div[contains(@class, "massage-")]//text()
|
|
|//div[@class="film-header"]/div[@class="intro"]//text()
|
|
concat: "\n\n"
|
|
Date:
|
|
selector: //span[@class="date"]/text()
|
|
postProcess:
|
|
- parseDate: January 2, 2006
|
|
Image: //meta[@property="og:image"]/@content
|
|
Studio:
|
|
Name:
|
|
fixed: Hegre
|
|
Tags:
|
|
Name: //div[@class="approved-tags"]/a
|
|
Performers:
|
|
Name: $actor/@title
|
|
URL:
|
|
selector: $actor//a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://hegre.com
|
|
|
|
galleryScraper:
|
|
common:
|
|
$actor: //a[@class="record-model"]
|
|
gallery:
|
|
Title: //div[@class="record-toolbar clearfix"]//h1
|
|
Date:
|
|
selector: //span[@class="date"]/text()
|
|
postProcess:
|
|
- parseDate: January 2, 2006
|
|
Studio:
|
|
Name:
|
|
fixed: Hegre
|
|
Tags:
|
|
Name: //div[@class="approved-tags"]/a
|
|
Performers:
|
|
Name: $actor/@title
|
|
URL:
|
|
selector: $actor//a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://hegre.com
|
|
|
|
performerSearch:
|
|
performer:
|
|
Name: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//a/@title
|
|
URL:
|
|
selector: //div[@class="search-results"][not(preceding::h2[@class="no-results"])]//div[@id="search-results-wrapper"]//a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://www.hegre.com
|
|
|
|
performerScraper:
|
|
# Not much, but better than nothing, since most of the performers just have forenames
|
|
performer:
|
|
Name: //span[@class="label"][contains(text(),"Name")]/following-sibling::span/text()
|
|
Gender:
|
|
fixed: Female
|
|
Birthdate:
|
|
selector: //span[@class="label"][contains(text(),"Born")]/following-sibling::span
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "01 "
|
|
- parseDate: 02 January, 2006
|
|
Country:
|
|
selector: //span[@class="label"][contains(text(),"Country")]/following-sibling::span/text()
|
|
Height:
|
|
selector: //span[@class="label"][contains(text(),"Height")]/following-sibling::span/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: cm
|
|
with: ""
|
|
Image:
|
|
selector: //div[@class="details"]//img/@src
|
|
# Newer models allow for 1440x
|
|
postProcess:
|
|
- replace:
|
|
- regex: \d{3}x
|
|
with: 640x
|
|
Details:
|
|
selector: //div[@class="description translated-text"]//child::text()
|
|
concat: "\n\n"
|
|
# Last Updated November 03, 2022
|