name: AdultEmpire sceneByURL: - action: scrapeXPath url: - adultdvdempire.com/clip - adultempire.com/clip scraper: sceneScraperSingle - action: scrapeXPath url: - adultdvdempire.com - adultempire.com scraper: sceneScraper movieByURL: - action: scrapeXPath url: - adultdvdempire.com - adultempire.com scraper: movieScraper performerByName: action: scrapeXPath queryURL: https://www.adultempire.com/performer/search?q={} scraper: performerSearch performerByURL: - action: scrapeXPath url: - adultempire.com - adultdvdempire.com scraper: performerScraper sceneByName: action: scrapeXPath # if needed replace `dvd` with `vod` queryURL: https://www.adultdvdempire.com/dvd/search?q={} scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper xPathScrapers: performerSearch: common: $perfomerRoot: //div[@id="performerlist"]//a performer: Name: $perfomerRoot/@label URL: selector: $perfomerRoot/@href postProcess: - replace: - regex: ^ with: https://www.adultempire.com sceneSearch: scene: Title: selector: //div[@class="item-title"]/a/@*[local-name()="href" or local-name()="title"] concat: "|" postProcess: - replace: - regex: '/(\d+)/[^|]+\|([^|]+)' with: "$2 ($1)" split: "|" URL: selector: //div[@id="content"]//div[@class="item-title"]/a/@href postProcess: - replace: - regex: ^ with: "https://www.adultdvdempire.com" Image: selector: //a[@class="boxcover"]/img/@data-src movieScraper: movie: Name: //h1/text() Director: //a[@label="Director"]/text() Duration: selector: //small[contains(text(), "Length")]/following-sibling::text() postProcess: - replace: - regex: " hrs. " with: ":" - regex: " mins." with: ":00" Date: selector: //small[contains(text(), "Released")]/following-sibling::text() postProcess: - parseDate: Jan 02 2006 Synopsis: selector: //div[contains(@class,"synopsis-content")]//text() concat: " " Studio: Name: //a[@label="Studio"]/text() FrontImage: //a[@id="front-cover"]/@data-href BackImage: //a[@id="back-cover"]/@href # Rating is not yet implemented in the UX # Rating: # selector: //span[@class='rating-stars-avg']/text() # postProcess: # - replace: # - regex: (\d).+ # with: $1 URL: //meta[@name='og:url']/@content sceneScraperSingle: common: $header: //div[@class="clip-page__detail__title text-display-primary"] $clipId: (//*[@data-tid]/@data-tid)[1] scene: Title: $header/h1 Studio: Name: //div[contains(text(), "by")]/a Movies: Name: //div[contains(text(), "from")]/a Date: selector: //strong[contains(text(), "Released")]/following-sibling::text() postProcess: - parseDate: Jan 02 2006 Tags: Name: //strong[contains(text(), "Attributes")]/following-sibling::a/text() Performers: Name: //strong[contains(text(), "Starring")]/following-sibling::a/text() URL: selector: //strong[contains(text(), "Starring")]/following-sibling::a/@href postProcess: - replace: - regex: ^ with: "https://www.adultdvdempire.com" Image: selector: (//*[@data-tid]/@data-tid)[1] | //a[@id="front-cover"]/img/@src concat: __SEPARATOR__ postProcess: - replace: - regex: (\d+).*/([^/]*\d+)[^/\d]*$ with: https://imgs1cdn.adultempire.com/backdrop/6000/$2%5f$1/scene-1.jpg sceneScraper: scene: Title: //h1/text() Details: selector: //div[contains(@class,"synopsis-content")]//text() concat: " " Date: selector: //small[contains(text(), "Released")]/following-sibling::text() postProcess: - parseDate: Jan 02 2006 Director: //a[@label="Director"]/text() Image: //a[@id="front-cover"]/@data-href Studio: Name: //a[@label="Studio"]/text() Movies: Name: //h1/text() URL: //link[@rel="canonical"]/@href Tags: Name: //div[h2[contains(.,'Categories')]]//a[@label="Category"]/text() Performers: Name: //a[@label="Performer"]//text() URL: selector: //a[@label="Performer"]/@href postProcess: - replace: - regex: ^ with: "https://www.adultdvdempire.com" URL: //meta[@name='og:url']/@content performerScraper: common: $infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul performer: Name: //*[@id="content"]/section/div/div[2]/h1/text() Birthdate: selector: $infoPiece/li[contains(text(), 'Born:')]/text() postProcess: - replace: - regex: Born:\s+(.*) with: $1 Height: selector: $infoPiece/li[contains(text(), 'Height:')]/text() postProcess: - replace: - regex: Height:\s+(.*) with: $1 - feetToCm: true Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src|//section[@class="container-fluid"]//a[@class="fancy"][@label="Headshot"]/@href Country: selector: $infoPiecel/li[contains(text(), 'From:')]/text() postProcess: - replace: - regex: From:\s+(.*) with: $1 Measurements: selector: $infoPiece/li[contains(text(), 'Measurements:')]/text() postProcess: - replace: - regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).* with: $1-$2-$3 Aliases: selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")] concat: ", " postProcess: - replace: - regex: "Alias: (.*)" with: $1 Details: //*[@id="content"]/section/div/div[5]/aside/text() URL: //link[@rel='canonical']/@href # Last Updated Feburary 22, 2024