name: Clips4Sale sceneByURL: - action: scrapeXPath url: - clips4sale.com/studio/ scraper: c4sSceneScraper sceneByName: action: scrapeXPath queryURL: https://www.clips4sale.com/clips/search/{}/category/0/storesPage/1/clipsPage/1 scraper: c4sSearch sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: c4sSceneScraper xPathScrapers: c4sSearch: common: $card: //figure[contains(@class, "splide__slide")] scene: Title: selector: $card/@aria-label postProcess: - replace: - regex: <\/?em> with: "" URL: selector: $card//a[contains(@class, "search-clip__title")]/@href postProcess: - replace: - regex: ^ with: https://www.clips4sale.com Image: selector: $card//img/@src c4sSceneScraper: common: $scene: //figure[contains(@class, "mediabook-preview")] $studio: //a[@class="hover:underline text-white font-medium w-full truncate block"] scene: Title: selector: $scene/figcaption//h1 postProcess: &ppStripTitle - replace: # https://regex101.com/r/CYLeuO/8 - regex: >- (?i)[ \t]*((Super )?[SH]D)?[ ,-]*(\b(MP4|OPTIMUM|WMV|MOV|AVI|UHD|[48]K)\b|1080p|720p|480p|(Standard|High) Def(inition)?)+[ \t]* with: # https://regex101.com/r/C3TgFG/2 - regex: >- [ \t]*[([][])][ \t]*|[ -]+$ with: Details: selector: $scene//div[contains(@class, "read-more--text")]//text() concat: "\n\n" Studio: Name: $studio/text() URL: selector: $studio/@href postProcess: &ppPrependOrigin - replace: - regex: ^ with: https://www.clips4sale.com Date: selector: //div[contains(@class, 'border-b border-white/20 lg:border-0 pb-3 lg:pb-0 mb-3 lg:mb-0')]/span[contains(text(),'/')]/text() postProcess: &ppParseDate - replace: - regex: \s.+ with: "" - parseDate: 1/2/06 Tags: Name: selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() concat: "," postProcess: - replace: - regex: ',\s+' with: "," split: "," Performers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. Name: selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() | $studio/text() concat: "," postProcess: - replace: - regex: ',\s+' with: "," split: "," Image: selector: $scene//img/@src URL: //meta[@property="og:url"]/@content driver: cookies: - CookieURL: "https://clips4sale.com" Cookies: - Name: "iAgreeWithTerms" Domain: ".clips4sale.com" Value: "true" Path: "/" headers: - Key: User-Agent Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) # Last Updated November 07, 2023