78 lines
2.2 KiB
YAML
78 lines
2.2 KiB
YAML
name: "Rule34Video"
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- rule34video.com/videos/
|
|
- rule34video.com/video/
|
|
scraper: sceneScraper
|
|
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://rule34video.com/search/{}
|
|
scraper: sceneSearch
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
|
|
xPathScrapers:
|
|
sceneSearch:
|
|
common:
|
|
$searchItem: //div[contains(@class,"item thumb video_")]
|
|
scene:
|
|
Title: $searchItem/a[@class="th js-open-popup"]/@title
|
|
URL: $searchItem/a[@class="th js-open-popup"]/@href
|
|
Image: $searchItem//img/@data-original
|
|
|
|
sceneScraper:
|
|
common:
|
|
$article: //div[@class="video_tools"]
|
|
scene:
|
|
URL: //link[@rel='canonical']/@href
|
|
Title: //div[@class="heading"]//h1
|
|
Details: //div[@class="video_tools"]//div[contains(text(),"Description:")]/em
|
|
Date:
|
|
selector: //script[@type="application/ld+json"]
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.+uploadDate": "([^"]+)".+'
|
|
with: $1
|
|
Image:
|
|
selector: //script[@type="application/ld+json"]
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.+thumbnailUrl": "(http[^"]+)".+'
|
|
with: $1
|
|
Tags:
|
|
Name:
|
|
selector: //div[@class="video_tools"]//div[text()="Categories:" or text()="Tags:"]/following-sibling::a//text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: '^\+.+Suggest$'
|
|
with: ""
|
|
Studio:
|
|
Name: //div[@class="video_tools"]//div[text()="Artist:"]/following-sibling::a/span
|
|
URL: //div[@class="video_tools"]//div[text()="Artist:"]/following-sibling::a/@href
|
|
debug:
|
|
printHTML: true
|
|
driver:
|
|
headers:
|
|
- Key: User-Agent
|
|
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0)
|
|
cookies:
|
|
- CookieURL: "https://rule34video.com/"
|
|
Cookies:
|
|
- Name: kt_rt_popAccess
|
|
Value: "1"
|
|
Domain: .rule34video.com
|
|
Path: /
|
|
- Name: kt_tcookie
|
|
Value: "1"
|
|
Domain: .rule34video.com
|
|
Path: /
|
|
- Name: cf_clearance
|
|
ValueRandom: 43
|
|
Domain: .rule34video.com
|
|
Path: /
|
|
# Last Updated September 29, 2023
|