130 lines
3.8 KiB
YAML
130 lines
3.8 KiB
YAML
name: Watch4Beauty
|
|
sceneByURL:
|
|
- action: scrapeJson
|
|
url: &storiesURL
|
|
- https://www.watch4beauty.com/stories/
|
|
queryURL: &queryURL https://www.watch4beauty.com/api/{url}
|
|
scraper: sceneStoriesScraper
|
|
queryURLReplace: &storiesQueryURL
|
|
url:
|
|
- regex: ".+/stories/"
|
|
with: "magazines/"
|
|
- action: scrapeJson
|
|
url: &updatesURL
|
|
- https://www.watch4beauty.com/updates/
|
|
queryURL: *queryURL
|
|
scraper: sceneUpdatesScraper
|
|
queryURLReplace: &updatesQueryURL
|
|
url:
|
|
- regex: ".+/updates/"
|
|
with: "issues/"
|
|
galleryByURL:
|
|
- action: scrapeJson
|
|
url: *storiesURL
|
|
queryURL: *queryURL
|
|
scraper: galleryStoriesScraper
|
|
queryURLReplace: *storiesQueryURL
|
|
- action: scrapeJson
|
|
url: *updatesURL
|
|
queryURL: *queryURL
|
|
scraper: galleryUpdatesScraper
|
|
queryURLReplace: *updatesQueryURL
|
|
|
|
jsonScrapers:
|
|
sceneStoriesScraper:
|
|
scene:
|
|
Title: &storiesTitle 0.magazine_title
|
|
Details: &storiesDetails 0.magazine_text
|
|
Date: &storiesDate
|
|
selector: 0.magazine_datetime
|
|
postProcess:
|
|
- replace:
|
|
- regex: T.+
|
|
with: ""
|
|
Image: &storiesImage
|
|
selector: 0.magazine_datetime
|
|
postProcess:
|
|
- replace:
|
|
- regex: T.+
|
|
with: ""
|
|
- replace:
|
|
- regex: '(\d+)-(\d+)-(\d+)'
|
|
with: "https://s5q3w2t8.ssl.hwcdn.net/production/$1$2$3-magazine-cover-wide-2560.jpg" # wide cover
|
|
#with: "https://s5q3w2t8.ssl.hwcdn.net/production/$1$2$3-magazine-cover-640.jpg" # normal cover
|
|
Performers: &storiesPerformers
|
|
Name:
|
|
selector: 0.magazine_simple_title
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "https://www.watch4beauty.com/api/magazines/"
|
|
- regex: $
|
|
with: "/models"
|
|
- subScraper:
|
|
selector: "0.Models.#.model_nickname"
|
|
concat: ","
|
|
|
|
split: ","
|
|
Studio: &studioAttr
|
|
Name:
|
|
fixed: Watch4Beauty
|
|
Tags: &storiesTags
|
|
Name:
|
|
selector: 0.magazine_tags
|
|
split: ", "
|
|
sceneUpdatesScraper:
|
|
scene:
|
|
Title: &updatesTitle 0.issue_title
|
|
Details: &updatesDetails 0.issue_text
|
|
Date: &updatesDate
|
|
selector: 0.issue_datetime
|
|
postProcess:
|
|
- replace:
|
|
- regex: T.+
|
|
with: ""
|
|
Image:
|
|
selector: 0.issue_datetime
|
|
postProcess:
|
|
- replace:
|
|
- regex: T.+
|
|
with: ""
|
|
- replace:
|
|
- regex: '(\d+)-(\d+)-(\d+)'
|
|
with: "https://s5q3w2t8.ssl.hwcdn.net/production/$1$2$3-issue-cover-wide-2560.jpg" # wide cover
|
|
#with: "https://s5q3w2t8.ssl.hwcdn.net/production/$1$2$3-issue-cover-640.jpg" # normal cover
|
|
Performers: &updatesPerformers
|
|
Name:
|
|
selector: 0.issue_simple_title
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: "https://www.watch4beauty.com/api/issues/"
|
|
- regex: $
|
|
with: "/models"
|
|
- subScraper:
|
|
selector: "0.Models.#.model_nickname"
|
|
concat: ","
|
|
split: ","
|
|
Studio: *studioAttr
|
|
Tags: &updatesTags
|
|
Name:
|
|
selector: 0.issue_tags
|
|
split: ", "
|
|
galleryStoriesScraper:
|
|
gallery:
|
|
Title: *storiesTitle
|
|
Details: *storiesDetails
|
|
Date: *storiesDate
|
|
Performers: *storiesPerformers
|
|
Studio: *studioAttr
|
|
Tags: *storiesTags
|
|
galleryUpdatesScraper:
|
|
gallery:
|
|
Title: *updatesTitle
|
|
Details: *updatesDetails
|
|
Performers: *updatesPerformers
|
|
Date: *updatesDate
|
|
Studio: *studioAttr
|
|
Tags: *updatesTags
|
|
# Last Updated November 12, 2021
|