name: Pornhub performerByName: action: scrapeXPath queryURL: https://www.pornhub.com/pornstars/search?search={} scraper: performerSearch performerByURL: - action: scrapeXPath url: - pornhub.com scraper: performerScraper sceneByURL: - action: scrapeXPath url: - pornhub.com/view_video.php?viewkey= scraper: sceneScraper sceneByFragment: action: scrapeXPath queryURL: https://www.pornhub.com/view_video.php?viewkey={filename} queryURLReplace: filename: - regex: (?:.*[^a-zA-Z\d])?((?:ph)?(?:[a-zA-Z\d]{13})).+ with: $1 - regex: .*\.[^\.]+$ # if no ph id is found in the filename with: # clear the filename so that it doesn't leak to ph scraper: sceneScraper sceneByName: action: scrapeXPath queryURL: https://www.pornhub.com/video/search?search={} scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper xPathScrapers: sceneSearch: common: $searchItem: //ul[contains(@class, "search-video-thumbs") and not(@id="bottomVideos")] $searchThumb: //ul[contains(@class, "search-video-thumbs") and not(@id="bottomVideos")]//div[contains(@class, "thumbnail-info-wrapper")]/span[@class="title"]/a scene: Title: $searchThumb/text() URL: selector: $searchThumb/@href postProcess: - replace: - regex: ^ with: "https://www.pornhub.com" Image: selector: $searchItem//div[contains(@class, "phimage")]//img/@data-mediumthumb performerSearch: performer: Name: //div[@class="wrap"]/div[@class="thumbnail-info-wrapper"]/a[@class="title"]/text() URL: selector: //div[@class="wrap"]/div[@class="thumbnail-info-wrapper"]/a[@class="title"]/@href postProcess: - replace: - regex: ^ with: "https://www.pornhub.com" performerScraper: common: $infoPiece: //div[@class="infoPiece"] $infoContainer: //div[@class="infoContainer"] $smallInfo: span[@class="smallInfo"] performer: Name: //h1[@itemprop="name"]|$infoContainer//h1 Birthdate: selector: //span[@itemprop="birthDate"]|$infoPiece[contains(span,"Born:")]/text() postProcess: - parseDate: Jan 2, 2006 - parseDate: 2006-01-02 Country: selector: $infoPiece[contains(span,"Birthplace:")]/text()|$infoPiece[contains(span,"City and Country:")]/$smallInfo|$infoPiece[contains(span,"Birth Place:")]/$smallInfo postProcess: - replace: - regex: .+,\s?([^,]+$) with: $1 - map: US: "USA" United States of America: "USA" Gender: $infoPiece[contains(span,"Gender:")]/$smallInfo Twitter: //ul[contains(@class,"socialList")]//a[contains(@href,"twitter.com/")]/@href Instagram: //ul[contains(@class,"socialList")]//a[contains(@href,"instagram.com/")]/@href Measurements: $infoPiece[contains(span,"Measurements:")]/$smallInfo|$infoPiece[contains(span,"Measurements:")]/text() Weight: selector: $infoPiece[contains(span,"Weight:")]/$smallInfo|$infoPiece[contains(span,"Weight:")]/text() postProcess: - replace: - regex: .*\((\d+)\s*kg\) with: $1 Height: selector: $infoPiece[contains(span,"Height:")]/$smallInfo|$infoPiece[contains(span,"Height:")]/text() postProcess: - replace: - regex: .*\((\d+)\s*cm\) with: $1 Details: //div[@itemprop="description" or starts-with(@class,"text longBio")] Ethnicity: $infoPiece[contains(span,"Ethnicity:")]/$smallInfo|$infoPiece[contains(span,"Ethnicity:")]/text() FakeTits: $infoPiece[contains(span,"Fake Boobs:")]/$smallInfo Piercings: $infoPiece[contains(span,"Piercings:")]/$smallInfo Tattoos: $infoPiece[contains(span,"Tattoos:")]/$smallInfo HairColor: $infoPiece[contains(span,"Hair Color:")]/$smallInfo CareerLength: selector: $infoPiece[contains(span,"Career Start and End:")]/$smallInfo postProcess: - replace: - regex: \s+to\s+ with: "-" URL: //link[@rel="canonical"][1]/@href Image: //div[@class="thumbImage"]/img/@src|//img[@id="getAvatar"]/@src sceneScraper: common: $datablob: //script[contains(., 'VideoObject')]/text() $videowrap: //div[@class="video-wrapper"] scene: Title: //meta[@property="og:title"]/@content URL: //meta[@property="og:url"]/@content Date: selector: $datablob postProcess: - replace: - regex: .+(?:"uploadDate"\s*:\s*")([^T]+).+ with: $1 - parseDate: "2006-01-02" Tags: Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," categoriesWrapper ")]/a/text()|$videowrap//div[contains(concat(" ",normalize-space(@class)," ")," tagsWrapper ")]/a/text() Performers: Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," pornstarsWrapper ")]/a/@data-mxptext Image: //meta[@property="og:image"]/@content Studio: Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," usernameWrap ")]//a/text() driver: cookies: - CookieURL: "https://www.pornhub.com" Cookies: - Name: "accessAgeDisclaimerPH" Domain: ".pornhub.com" Value: "1" Path: "/" # Last Updated September 26, 2023