109 lines
3.4 KiB
YAML
109 lines
3.4 KiB
YAML
name: Clips4Sale
|
|
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- clips4sale.com/studio/
|
|
scraper: c4sSceneScraper
|
|
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.clips4sale.com/clips/search/{}/category/0/storesPage/1/clipsPage/1
|
|
scraper: c4sSearch
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: c4sSceneScraper
|
|
|
|
xPathScrapers:
|
|
c4sSearch:
|
|
common:
|
|
$card: //figure[contains(@class, "splide__slide")]
|
|
scene:
|
|
Title:
|
|
selector: $card/@aria-label
|
|
postProcess:
|
|
- replace:
|
|
- regex: <\/?em>
|
|
with: ""
|
|
URL:
|
|
selector: $card//a[contains(@class, "search-clip__title")]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://www.clips4sale.com
|
|
Image:
|
|
selector: $card//img/@src
|
|
|
|
c4sSceneScraper:
|
|
common:
|
|
$scene: //figure[contains(@class, "mediabook-preview")]
|
|
$studio: //a[@class="hover:underline text-white font-medium w-full truncate block"]
|
|
scene:
|
|
Title:
|
|
selector: $scene/figcaption//h1
|
|
postProcess: &ppStripTitle
|
|
- replace:
|
|
# https://regex101.com/r/CYLeuO/8
|
|
- regex: >-
|
|
(?i)[ \t]*((Super )?[SH]D)?[ ,-]*(\b(MP4|OPTIMUM|WMV|MOV|AVI|UHD|[48]K)\b|1080p|720p|480p|(Standard|High) Def(inition)?)+[ \t]*
|
|
with:
|
|
# https://regex101.com/r/C3TgFG/2
|
|
- regex: >-
|
|
[ \t]*[([][])][ \t]*|[ -]+$
|
|
with:
|
|
Details:
|
|
selector: $scene//div[contains(@class, "read-more--text")]//text()
|
|
concat: "\n\n"
|
|
Studio:
|
|
Name: $studio/text()
|
|
URL:
|
|
selector: $studio/@href
|
|
postProcess: &ppPrependOrigin
|
|
- replace:
|
|
- regex: ^
|
|
with: https://www.clips4sale.com
|
|
Date:
|
|
selector: //div[contains(@class, 'border-b border-white/20 lg:border-0 pb-3 lg:pb-0 mb-3 lg:mb-0')]/span[contains(text(),'/')]/text()
|
|
postProcess: &ppParseDate
|
|
- replace:
|
|
- regex: \s.+
|
|
with: ""
|
|
- parseDate: 1/2/06
|
|
Tags:
|
|
Name:
|
|
selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text()
|
|
concat: ","
|
|
postProcess:
|
|
- replace:
|
|
- regex: ',\s+'
|
|
with: ","
|
|
split: ","
|
|
Performers:
|
|
# Clips4Sale doesn't have an explict performer field, but performers are
|
|
# often included in the video tags. So we attempt to find matches there.
|
|
Name:
|
|
selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() | $studio/text()
|
|
concat: ","
|
|
postProcess:
|
|
- replace:
|
|
- regex: ',\s+'
|
|
with: ","
|
|
split: ","
|
|
Image:
|
|
selector: $scene//img/@src
|
|
URL: //meta[@property="og:url"]/@content
|
|
|
|
driver:
|
|
cookies:
|
|
- CookieURL: "https://clips4sale.com"
|
|
Cookies:
|
|
- Name: "iAgreeWithTerms"
|
|
Domain: ".clips4sale.com"
|
|
Value: "true"
|
|
Path: "/"
|
|
headers:
|
|
- Key: User-Agent
|
|
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0)
|
|
# Last Updated November 07, 2023
|