89 lines
2.7 KiB
YAML
Executable File
89 lines
2.7 KiB
YAML
Executable File
name: LucasEntertainment
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- lucasentertainment.com/scenes/
|
|
- lucasentertainment.com/tour/scenes/
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- lucasentertainment.com/movies/
|
|
- lucasentertainment.com/tour/movies/
|
|
scraper: movieScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- lucasentertainment.com/models/
|
|
- lucasentertainment.com/tour/models/
|
|
scraper: performerScraper
|
|
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$performer: //strong[text() = "Performers:"]/following-sibling::a
|
|
scene:
|
|
Title: &titleSel //h2/text()
|
|
Date:
|
|
selector: &dateSel //strong[text() = "Release Date:"]/following-sibling::text()[1]
|
|
postProcess:
|
|
- replace:
|
|
- regex: (^[a-zA-Z]+\s[0-9]+)[a-z]+(\s[0-9]+$)
|
|
with: $1$2
|
|
- parseDate: January 2 2006
|
|
Performers:
|
|
Name: $performer/text()
|
|
URL: $performer/@href
|
|
Details: &detailsAttr
|
|
selector: //div[@class="container"]//p[@class="plain-link"]/following-sibling::p/text()
|
|
concat: "\n\n"
|
|
Image:
|
|
selector: //script[contains(text(), "image:")]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: "^.*image: '([^']+)'.*$"
|
|
with: $1
|
|
Studio: &studioAttr
|
|
Name:
|
|
fixed: Lucas Entertainment
|
|
Movies:
|
|
Name: //strong[text() = "From:"]/following-sibling::a[1]/text()
|
|
URL: //strong[text() = "From:"]/following-sibling::a[1]/@href
|
|
movieScraper:
|
|
movie:
|
|
Name: *titleSel
|
|
Date:
|
|
selector: *dateSel
|
|
postProcess:
|
|
- parseDate: Jan 02, 2006
|
|
Duration:
|
|
selector: //strong[text() = "Runtime:"]/following-sibling::text()[1]
|
|
postProcess:
|
|
- replace:
|
|
- regex: "^[^0-9]*([0-9]+)\\s+minutes"
|
|
with: "0:$1:00"
|
|
- replace:
|
|
- regex: "^.*N/A.*$"
|
|
with:
|
|
Synopsis: *detailsAttr
|
|
Studio: *studioAttr
|
|
FrontImage: //img[@class="movie-front-cover main-photo"]/@src
|
|
performerScraper:
|
|
performer:
|
|
Name: *titleSel
|
|
Gender:
|
|
fixed: Male
|
|
Measurements:
|
|
selector: //strong[text() = "Dick Size:"]/following-sibling::text()[1]
|
|
postProcess:
|
|
- replace:
|
|
- regex: (^.*$)
|
|
with: 0'$1
|
|
- feetToCm: true
|
|
Height:
|
|
selector: //strong[text() = "Height:"]/following-sibling::text()[1]
|
|
postProcess:
|
|
- feetToCm: true
|
|
Image: //div[@class="col-sm-5 col-md-5 col-lg-4 model-main-photo"]/img/@data-original
|
|
# Last Updated January 07, 2023
|