70 lines
2.2 KiB
YAML
70 lines
2.2 KiB
YAML
name: "xhamster"
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- xhamster.com
|
|
scraper: sceneScraper
|
|
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
scraper: sceneScraper
|
|
queryURL: https://xhamster.com/videos/{filename}
|
|
queryURLReplace:
|
|
filename:
|
|
# expects an id in square brackets before extension, as saved by yt-dlp by default
|
|
- regex: '.*\[([0-9a-zA-Z]{4,})\]\.[^\.]+'
|
|
with: $1
|
|
# or expects an id starting with -xh
|
|
- regex: '(?i)^.+-(xh[^.]+)\..+$'
|
|
with: $1
|
|
# if no id is found in the filename
|
|
- regex: .*\.[^\.]+$
|
|
with: # clear the filename so that it doesn't leak
|
|
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://xhamster.com/search/{}
|
|
scraper: sceneSearch
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
|
|
xPathScrapers:
|
|
sceneSearch:
|
|
common:
|
|
$videoThumbImage: //div[contains(@class, "thumb-list__item video-thumb")]//img[contains(@class, "thumb-image-container__image")]
|
|
$videoThumbInfo: //div[contains(@class, "thumb-list__item video-thumb")]//a[contains(@class, "video-thumb-info__name")]
|
|
scene:
|
|
Title: $videoThumbInfo/@title
|
|
URL:
|
|
selector: $videoThumbInfo/@href
|
|
Image:
|
|
selector: $videoThumbImage/@src
|
|
|
|
sceneScraper:
|
|
common:
|
|
$player: //div[@class="width-wrap with-player-container"]
|
|
scene:
|
|
Title: $player/h1
|
|
Details: //div[@class="ab-info controls-info__item xh-helper-hidden"]/p
|
|
URL: //meta[@property='og:url']/@content|//meta[@name='twitter:url']/@content
|
|
Date:
|
|
selector: //div[@class="entity-info-container__date tooltip-nocache"]/@data-tooltip
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d{4}-\d{2}-\d{2})\s.+
|
|
with: $1
|
|
- parseDate: 2006-01-02
|
|
Image: //link[@as="image"]/@href
|
|
Studio:
|
|
Name:
|
|
selector: $player/nav/ul/li/a[contains(@href,"/channels/")]
|
|
Tags:
|
|
Name:
|
|
selector: $player/nav/ul/li/a[contains(@href,"/categories/") or contains(@href,"/tags/")]
|
|
Performers:
|
|
Name:
|
|
selector: $player/nav/ul/li/a[contains(@href,"/pornstars/")]
|
|
# Last Updated February 17, 2024
|