125 lines
4.2 KiB
YAML
125 lines
4.2 KiB
YAML
name: DLsite
|
|
sceneByFragment:
|
|
action: scrapeXPath
|
|
# expects RJ code as the title
|
|
queryURL: https://www.dlsite.com/maniax/work/=/product_id/{title}.html
|
|
#queryURL: https://www.dlsite.com/maniax/work/=/product_id/{title}.html/?locale=en_US
|
|
scraper: sceneScraper
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- dlsite.com/maniax/work/
|
|
scraper: sceneScraper
|
|
queryURL: "{url}"
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category%5B0%5D/male/keyword/{}/order%5B0%5D/trend/per_page/30/from/fs.header
|
|
scraper: sceneQueryScraper
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
#queryURLReplace:
|
|
# url:
|
|
# - regex: $
|
|
# with: "/?locale=en_US"
|
|
scraper: sceneScraper
|
|
galleryByFragment:
|
|
action: scrapeXPath
|
|
queryURL: https://www.dlsite.com/maniax/work/=/product_id/{title}.html
|
|
#queryURL: https://www.dlsite.com/maniax/work/=/product_id/{title}.html/?locale=en_US
|
|
scraper: galleryManiaxScraper
|
|
galleryByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- dlsite.com/maniax/work/
|
|
scraper: galleryManiaxScraper
|
|
queryURL: "{url}"
|
|
- action: scrapeXPath
|
|
url:
|
|
- dlsite.com/books/work/
|
|
scraper: galleryBookScraper
|
|
queryURL: "{url}"
|
|
|
|
xPathScrapers:
|
|
sceneQueryScraper:
|
|
scene:
|
|
Title: //dd[@class='work_name']/div[@class='multiline_truncate']/a/text()
|
|
URL: //dd[@class='work_name']/div[@class='multiline_truncate']/a/@href
|
|
Image: //a[@class='work_thumb_inner']/img/@src
|
|
sceneScraper:
|
|
scene:
|
|
Title: &titleSel //h1[@id='work_name']/text()
|
|
Date: &dateSel
|
|
selector: "//th[text() = '販売日']/following-sibling::td//text()|//th[text() = 'Release date']/following-sibling::td//text()"
|
|
postProcess:
|
|
- replace:
|
|
- regex: "Jan"
|
|
with: "01"
|
|
- regex: "Feb"
|
|
with: "02"
|
|
- regex: "Mar"
|
|
with: "03"
|
|
- regex: "Apr"
|
|
with: "04"
|
|
- regex: "May"
|
|
with: "05"
|
|
- regex: "Jun"
|
|
with: "06"
|
|
- regex: "Jul"
|
|
with: "07"
|
|
- regex: "Aug"
|
|
with: "08"
|
|
- regex: "Sep"
|
|
with: "09"
|
|
- regex: "Oct"
|
|
with: "10"
|
|
- regex: "Nov"
|
|
with: "11"
|
|
- regex: "Dec"
|
|
with: "12"
|
|
- regex: \s.+$
|
|
with: ""
|
|
- regex: (\d{2})/(\d{2})/(\d{4})
|
|
with: "${3}年${1}月${2}日"
|
|
- parseDate: 2006年01月02日
|
|
Details: &detailsSel
|
|
selector: //div[contains(@class,"work_parts_area")]
|
|
concat: "\n\n"
|
|
Tags: &tagsSel
|
|
Name: //div[@class='main_genre']/a/text()
|
|
Performers:
|
|
# only illustrators and voice actors, writers and musicians are not included
|
|
Name: "//th[text() = '声優']/following-sibling::td/a/text()|//th[text() = 'Voice Actor']/following-sibling::td/a/text()|//th[text() = 'イラスト']/following-sibling::td/a/text()|//th[text() = 'Illustration']/following-sibling::td/a/text()"
|
|
Studio: &studioSel
|
|
Name: //span[@class='maker_name']/a/text()
|
|
Image:
|
|
selector: //li[@class='slider_item active']/picture/img/@srcset
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "https:"
|
|
URL: &urlSel //meta[@property='og:url']/@content
|
|
galleryManiaxScraper:
|
|
gallery:
|
|
Title: *titleSel
|
|
Date: *dateSel
|
|
Details: *detailsSel
|
|
Tags: *tagsSel
|
|
# only illustrators
|
|
Performers:
|
|
Name: "//th[text() = 'イラスト']/following-sibling::td/a/text()|//th[text() = 'Illustration']/following-sibling::td/a/text()"
|
|
Studio: *studioSel
|
|
URL: *urlSel
|
|
galleryBookScraper:
|
|
gallery:
|
|
Title: *titleSel
|
|
Date: *dateSel
|
|
Details: *detailsSel
|
|
Tags: *tagsSel
|
|
Performers:
|
|
# for maniax author is the writer, for books this is illustrators
|
|
Name: "//th[text() = '著者']/following-sibling::td/a/text()|//th[text() = 'Author']/following-sibling::td/a/text()"
|
|
Studio: *studioSel
|
|
URL: *urlSel
|
|
# Last Updated June 19, 2022
|