name: "KristenBjorn" sceneByURL: - action: scrapeXPath url: - kristenbjorn.com/video scraper: sceneScraper movieByURL: - action: scrapeXPath url: - kristenbjorn.com/web/model/store/ scraper: movieScraper performerByURL: - action: scrapeXPath url: - kristenbjorn.com/gay-porn-star/ - kristenbjorn.com/web/model/gay-porn-star/ scraper: performerScraper xPathScrapers: sceneScraper: common: $performer: //div[@class="scene-item"]/a[contains(@class,"scene-box")] scene: Title: //h1[@class="StoreHeaderNew"]/text() Details: //div[@class="med-text"]/p Code: selector: //meta[@property="og:url"]/@content postProcess: - replace: - regex: ^.*\/video-(\d+)\/.*$ with: $1 Date: selector: //div[@id="main"]//div[@class="date"][1]/text() postProcess: - parseDate: 02 Jan 2006 Tags: Name: selector: //a[contains(@title,"Categorie:")]//text() postProcess: - replace: - regex: (.*)\s\(\d+\) with: $1 Performers: Name: $performer//h5/text() URL: selector: $performer/@href postProcess: &appendDomain - replace: - regex: ^ with: http://kristenbjorn.com Image: &imageAttr //meta[@property="og:image"]/@content Studio: Name: fixed: Kristen Bjorn movieScraper: movie: Name: //article/div[1]/span/text() Date: selector: //span[text()="Release Date:"]/following-sibling::text() postProcess: - replace: - regex: /20([0-9]) with: /$1 - regex: ^(\d/\d\d/\d\d)$ with: 0$1 - parseDate: 01/02/06 Duration: selector: //span[text() = "Running Time:"]/following-sibling::text() postProcess: - replace: - regex: ^(\d+)\s*(?i:h)\D+$ # movies with only hours with: $1:00:00 - regex: ^(\d+)\s*(?i:m)\D+$ # movies with only minutes with: "$1:00" - regex: ^(\d+)\s*(?i:h)\D+(\d+)\s*(?i:m)\D+$ # movies with hours and minutes with: "$1:$2:00" Synopsis: //div[@class="more"] Director: //span[text()="Director:"]/following-sibling::text() Studio: Name: //span[text()="Studio:"]/following-sibling::text() FrontImage: selector: //div[@id="imagen0"]//img/@src postProcess: *appendDomain BackImage: selector: //div[@id="imagen1"]//img/@src postProcess: *appendDomain performerScraper: performer: Name: selector: //h1/text() postProcess: - replace: - regex: "^Gay Porn Star: (.*)$" with: $1 Gender: fixed: Male Country: selector: //h2[contains(text(),"Nationality:")]/following-sibling::text()[1] Height: selector: //h2[contains(text(),"Height:")]/following-sibling::text()[1] postProcess: - feetToCm: true Weight: selector: //h2[contains(text(),"Weight:")]/following-sibling::text()[1] postProcess: - replace: - regex: ^(\d+).+ with: $1 - lbToKg: true Measurements: selector: //h2[contains(text(), "Dick:")]/following-sibling::text()[1] postProcess: - replace: - regex: '[^\s]+ \((.*) cms\).*$' with: $1 Image: *imageAttr # Last Updated December 23, 2022