103 lines
3.4 KiB
YAML
103 lines
3.4 KiB
YAML
name: JavLibrary
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
queryURL: https://www.javlibrary.com/en/vl_searchbyid.php?keyword={filename}
|
|
queryURLReplace:
|
|
filename:
|
|
- regex: -JG\d
|
|
with: ""
|
|
- regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+)
|
|
with: $2
|
|
# Note: If javlibrary doesn't work, you can replace javlibrary with any mirrors in the queryURL.
|
|
# The scraper matches the DVD version of a title. If you want the Blu-Ray version you'll need to add the correct URL manually and scrape using that afterwards.
|
|
scraper: sceneScraper
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- javlibrary.com/
|
|
- p54u.com/
|
|
- d52q.com/
|
|
- n53i.com/
|
|
scraper: sceneScraper
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.javlibrary.com/en/vl_searchbyid.php?keyword={}
|
|
scraper: sceneScraper
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- javlibrary.com/
|
|
- p54u.com/
|
|
- d52q.com/
|
|
- n53i.com/
|
|
scraper: movieScraper
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
scene:
|
|
Title:
|
|
selector: //div[@class="videos"]/div/a/@title[not(contains(.,"(Blu-ray"))]|//div[@id="video_id"]/table/tbody/tr/td[@class="text"]/text()
|
|
# Using URL to still get a URL with sceneByFragment if a duplicate exist.
|
|
URL:
|
|
selector: //div[@class="videos"]/div/a/@title[not(contains(.,"(Blu-ray"))]/../@href|//meta[@property="og:url"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.*\?)(.*)
|
|
with: $2
|
|
- regex: ^
|
|
with: https://www.javlibrary.com/en/?
|
|
Date:
|
|
selector: //div[@id="video_date"]/table/tbody/tr/td[@class="text"]/text()
|
|
Details:
|
|
selector: //div[@id="video_title"]/h3/a/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(.*? ){1}
|
|
with:
|
|
Tags:
|
|
Name: //div[@id="video_genres"]/table/tbody/tr/td[@class="text"]/span/a
|
|
Performers:
|
|
Name: //div[@id="video_cast"]/table/tbody/tr/td[@class="text"]/span/span/a
|
|
Image:
|
|
selector: //div[@class="videos"]/div/a[not(contains(@title,"(Blu-ray"))]//img/@src|//div[@id="video_jacket"]/img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: (http:|https:)
|
|
with:
|
|
- regex: ^
|
|
with: "https:"
|
|
Studio:
|
|
Name: //div[@id="video_maker"]/table/tbody/tr/td[@class="text"]/span/a/text()
|
|
movieScraper:
|
|
movie:
|
|
Name: //div[@id="video_id"]/table/tbody/tr/td[@class="text"]/text()
|
|
Director: //div[@id='video_director']/table/tbody/tr/td[@class="text"]/span/a/text()
|
|
Duration:
|
|
selector: //div[@id="video_length"]/table/tbody/tr/td/span[@class="text"]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: $
|
|
with: ":00"
|
|
Date: //div[@id="video_date"]/table/tbody/tr/td[@class="text"]/text()
|
|
Synopsis:
|
|
selector: //div[@id="video_title"]/h3/a/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(.*? ){1}
|
|
with:
|
|
Studio:
|
|
Name: //div[@id="video_maker"]/table/tbody/tr/td[@class="text"]/span/a/text()
|
|
FrontImage:
|
|
selector: //div[@id="video_jacket"]/img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: (http:|https:)
|
|
with:
|
|
- regex: ^
|
|
with: "https:"
|
|
|
|
# Last Updated September 14, 2021
|