This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,94 @@
name: IWantClips
performerByName:
action: scrapeXPath
queryURL: https://iwantclips.com/artists?query={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- iwantclips.com/store
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
- iwantclips.com/store
scraper: sceneScraper
sceneByName:
action: scrapeXPath
queryURL: https://iwantclips.com/search/advsearch?query={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneSearch:
scene:
Title: //span[@class='clip-title']/a/text()
URL: //span[@class='clip-title']/a/@href
Image: //div[@class='clip-thumb-16-9']/a/img/@src
sceneScraper:
common:
$model: //a[@class="modelLink"]
scene:
Title: //h1[@class="headline hidden-xs"]/text()
URL: //link[@rel="canonical"]/@href
Date:
selector: //div[@class="col-xs-12 date fix"]/span/em/text()
postProcess:
- replace:
- regex: Published\s(.+)
with: $1
- parseDate: Jan 2, 2006
Details:
selector: //div[@class="col-xs-12 description fix"][last()]/span/text()
concat: "\n\n"
Tags:
Name:
selector: //div[@class="col-xs-12 hashtags hashtags-grey fix"]/span/em | //div[@class="col-xs-12 category fix"]/a
concat: ","
postProcess:
- replace:
- regex: 'Keywords:'
with: $1
- replace:
- regex: ',\s+'
with: ","
split: ","
Image:
selector: //div[contains(@class,'vidStuff')]//video[contains(@id,'html5_api')]/@poster | //div[contains(@class,'vidStuff')]//img/@src
postProcess:
- replace:
- regex: (\d*_.*((\.gif)|(\.mp4)))
with: t_$1
- replace:
- regex: '(\.gif)|(\.mp4)'
with: ".jpg"
Studio:
Name: $model
Performers:
Name: $model
performerScraper:
common:
$author: //li[@class="ais-Hits-item"][1]//span[contains(@class,'clip-author')]
performer:
Name: $author
URL: $author/a/@href
Details:
selector: $author/a/@href
postProcess:
- replace:
- regex: /store/
with: "/home/aboutMe/"
- subScraper: //p
concat: "\n\n"
Image: //div[@class="sideBar"]//img[contains(@class,'avatar')]/@src
performerSearch:
performer:
Name: //h5[contains(@class,'featured-store-name')]/a/text()
URL:
selector: //h5[contains(@class,'featured-store-name')]/a/@href
driver:
useCDP: true
# Last Updated December 14, 2023