95 lines
2.7 KiB
YAML
95 lines
2.7 KiB
YAML
name: IWantClips
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://iwantclips.com/artists?query={}
|
|
scraper: performerSearch
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- iwantclips.com/store
|
|
scraper: performerScraper
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- iwantclips.com/store
|
|
scraper: sceneScraper
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://iwantclips.com/search/advsearch?query={}
|
|
scraper: sceneSearch
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
xPathScrapers:
|
|
sceneSearch:
|
|
scene:
|
|
Title: //span[@class='clip-title']/a/text()
|
|
URL: //span[@class='clip-title']/a/@href
|
|
Image: //div[@class='clip-thumb-16-9']/a/img/@src
|
|
sceneScraper:
|
|
common:
|
|
$model: //a[@class="modelLink"]
|
|
scene:
|
|
Title: //h1[@class="headline hidden-xs"]/text()
|
|
URL: //link[@rel="canonical"]/@href
|
|
Date:
|
|
selector: //div[@class="col-xs-12 date fix"]/span/em/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: Published\s(.+)
|
|
with: $1
|
|
- parseDate: Jan 2, 2006
|
|
Details:
|
|
selector: //div[@class="col-xs-12 description fix"][last()]/span/text()
|
|
concat: "\n\n"
|
|
Tags:
|
|
Name:
|
|
selector: //div[@class="col-xs-12 hashtags hashtags-grey fix"]/span/em | //div[@class="col-xs-12 category fix"]/a
|
|
concat: ","
|
|
postProcess:
|
|
- replace:
|
|
- regex: 'Keywords:'
|
|
with: $1
|
|
- replace:
|
|
- regex: ',\s+'
|
|
with: ","
|
|
split: ","
|
|
Image:
|
|
selector: //div[contains(@class,'vidStuff')]//video[contains(@id,'html5_api')]/@poster | //div[contains(@class,'vidStuff')]//img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: (\d*_.*((\.gif)|(\.mp4)))
|
|
with: t_$1
|
|
- replace:
|
|
- regex: '(\.gif)|(\.mp4)'
|
|
with: ".jpg"
|
|
Studio:
|
|
Name: $model
|
|
Performers:
|
|
Name: $model
|
|
performerScraper:
|
|
common:
|
|
$author: //li[@class="ais-Hits-item"][1]//span[contains(@class,'clip-author')]
|
|
performer:
|
|
Name: $author
|
|
URL: $author/a/@href
|
|
Details:
|
|
selector: $author/a/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: /store/
|
|
with: "/home/aboutMe/"
|
|
- subScraper: //p
|
|
concat: "\n\n"
|
|
Image: //div[@class="sideBar"]//img[contains(@class,'avatar')]/@src
|
|
performerSearch:
|
|
performer:
|
|
Name: //h5[contains(@class,'featured-store-name')]/a/text()
|
|
URL:
|
|
selector: //h5[contains(@class,'featured-store-name')]/a/@href
|
|
|
|
driver:
|
|
useCDP: true
|
|
# Last Updated December 14, 2023
|