118 lines
3.6 KiB
YAML
Executable File
118 lines
3.6 KiB
YAML
Executable File
name: "KristenBjorn"
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- kristenbjorn.com/video
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- kristenbjorn.com/web/model/store/
|
|
scraper: movieScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- kristenbjorn.com/gay-porn-star/
|
|
- kristenbjorn.com/web/model/gay-porn-star/
|
|
scraper: performerScraper
|
|
|
|
xPathScrapers:
|
|
sceneScraper:
|
|
common:
|
|
$performer: //div[@class="scene-item"]/a[contains(@class,"scene-box")]
|
|
scene:
|
|
Title: //h1[@class="StoreHeaderNew"]/text()
|
|
Details: //div[@class="med-text"]/p
|
|
Code:
|
|
selector: //meta[@property="og:url"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^.*\/video-(\d+)\/.*$
|
|
with: $1
|
|
Date:
|
|
selector: //div[@id="main"]//div[@class="date"][1]/text()
|
|
postProcess:
|
|
- parseDate: 02 Jan 2006
|
|
Tags:
|
|
Name:
|
|
selector: //a[contains(@title,"Categorie:")]//text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.*)\s\(\d+\)
|
|
with: $1
|
|
Performers:
|
|
Name: $performer//h5/text()
|
|
URL:
|
|
selector: $performer/@href
|
|
postProcess: &appendDomain
|
|
- replace:
|
|
- regex: ^
|
|
with: http://kristenbjorn.com
|
|
Image: &imageAttr //meta[@property="og:image"]/@content
|
|
Studio:
|
|
Name:
|
|
fixed: Kristen Bjorn
|
|
movieScraper:
|
|
movie:
|
|
Name: //article/div[1]/span/text()
|
|
Date:
|
|
selector: //span[text()="Release Date:"]/following-sibling::text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: /20([0-9])
|
|
with: /$1
|
|
- regex: ^(\d/\d\d/\d\d)$
|
|
with: 0$1
|
|
- parseDate: 01/02/06
|
|
Duration:
|
|
selector: //span[text() = "Running Time:"]/following-sibling::text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(\d+)\s*(?i:h)\D+$ # movies with only hours
|
|
with: $1:00:00
|
|
- regex: ^(\d+)\s*(?i:m)\D+$ # movies with only minutes
|
|
with: "$1:00"
|
|
- regex: ^(\d+)\s*(?i:h)\D+(\d+)\s*(?i:m)\D+$ # movies with hours and minutes
|
|
with: "$1:$2:00"
|
|
Synopsis: //div[@class="more"]
|
|
Director: //span[text()="Director:"]/following-sibling::text()
|
|
Studio:
|
|
Name: //span[text()="Studio:"]/following-sibling::text()
|
|
FrontImage:
|
|
selector: //div[@id="imagen0"]//img/@src
|
|
postProcess: *appendDomain
|
|
BackImage:
|
|
selector: //div[@id="imagen1"]//img/@src
|
|
postProcess: *appendDomain
|
|
performerScraper:
|
|
performer:
|
|
Name:
|
|
selector: //h1/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: "^Gay Porn Star: (.*)$"
|
|
with: $1
|
|
Gender:
|
|
fixed: Male
|
|
Country:
|
|
selector: //h2[contains(text(),"Nationality:")]/following-sibling::text()[1]
|
|
Height:
|
|
selector: //h2[contains(text(),"Height:")]/following-sibling::text()[1]
|
|
postProcess:
|
|
- feetToCm: true
|
|
Weight:
|
|
selector: //h2[contains(text(),"Weight:")]/following-sibling::text()[1]
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^(\d+).+
|
|
with: $1
|
|
- lbToKg: true
|
|
Measurements:
|
|
selector: //h2[contains(text(), "Dick:")]/following-sibling::text()[1]
|
|
postProcess:
|
|
- replace:
|
|
- regex: '[^\s]+ \((.*) cms\).*$'
|
|
with: $1
|
|
Image: *imageAttr
|
|
# Last Updated December 23, 2022
|