stash
This commit is contained in:
143
stash/config/scrapers/community/Pornhub/Pornhub.yml
Normal file
143
stash/config/scrapers/community/Pornhub/Pornhub.yml
Normal file
@@ -0,0 +1,143 @@
|
||||
name: Pornhub
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.pornhub.com/pornstars/search?search={}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- pornhub.com
|
||||
scraper: performerScraper
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- pornhub.com/view_video.php?viewkey=
|
||||
scraper: sceneScraper
|
||||
sceneByFragment:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.pornhub.com/view_video.php?viewkey={filename}
|
||||
queryURLReplace:
|
||||
filename:
|
||||
- regex: (?:.*[^a-zA-Z\d])?((?:ph)?(?:[a-zA-Z\d]{13})).+
|
||||
with: $1
|
||||
- regex: .*\.[^\.]+$ # if no ph id is found in the filename
|
||||
with: # clear the filename so that it doesn't leak to ph
|
||||
scraper: sceneScraper
|
||||
|
||||
sceneByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.pornhub.com/video/search?search={}
|
||||
scraper: sceneSearch
|
||||
sceneByQueryFragment:
|
||||
action: scrapeXPath
|
||||
queryURL: "{url}"
|
||||
scraper: sceneScraper
|
||||
|
||||
xPathScrapers:
|
||||
sceneSearch:
|
||||
common:
|
||||
$searchItem: //ul[contains(@class, "search-video-thumbs") and not(@id="bottomVideos")]
|
||||
$searchThumb: //ul[contains(@class, "search-video-thumbs") and not(@id="bottomVideos")]//div[contains(@class, "thumbnail-info-wrapper")]/span[@class="title"]/a
|
||||
|
||||
scene:
|
||||
Title: $searchThumb/text()
|
||||
URL:
|
||||
selector: $searchThumb/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: "https://www.pornhub.com"
|
||||
Image:
|
||||
selector: $searchItem//div[contains(@class, "phimage")]//img/@data-mediumthumb
|
||||
performerSearch:
|
||||
performer:
|
||||
Name: //div[@class="wrap"]/div[@class="thumbnail-info-wrapper"]/a[@class="title"]/text()
|
||||
URL:
|
||||
selector: //div[@class="wrap"]/div[@class="thumbnail-info-wrapper"]/a[@class="title"]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: "https://www.pornhub.com"
|
||||
performerScraper:
|
||||
common:
|
||||
$infoPiece: //div[@class="infoPiece"]
|
||||
$infoContainer: //div[@class="infoContainer"]
|
||||
$smallInfo: span[@class="smallInfo"]
|
||||
|
||||
performer:
|
||||
Name: //h1[@itemprop="name"]|$infoContainer//h1
|
||||
Birthdate:
|
||||
selector: //span[@itemprop="birthDate"]|$infoPiece[contains(span,"Born:")]/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
- parseDate: 2006-01-02
|
||||
Country:
|
||||
selector: $infoPiece[contains(span,"Birthplace:")]/text()|$infoPiece[contains(span,"City and Country:")]/$smallInfo|$infoPiece[contains(span,"Birth Place:")]/$smallInfo
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+,\s?([^,]+$)
|
||||
with: $1
|
||||
- map:
|
||||
US: "USA"
|
||||
United States of America: "USA"
|
||||
Gender: $infoPiece[contains(span,"Gender:")]/$smallInfo
|
||||
Twitter: //ul[contains(@class,"socialList")]//a[contains(@href,"twitter.com/")]/@href
|
||||
Instagram: //ul[contains(@class,"socialList")]//a[contains(@href,"instagram.com/")]/@href
|
||||
Measurements: $infoPiece[contains(span,"Measurements:")]/$smallInfo|$infoPiece[contains(span,"Measurements:")]/text()
|
||||
Weight:
|
||||
selector: $infoPiece[contains(span,"Weight:")]/$smallInfo|$infoPiece[contains(span,"Weight:")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\((\d+)\s*kg\)
|
||||
with: $1
|
||||
Height:
|
||||
selector: $infoPiece[contains(span,"Height:")]/$smallInfo|$infoPiece[contains(span,"Height:")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\((\d+)\s*cm\)
|
||||
with: $1
|
||||
Details: //div[@itemprop="description" or starts-with(@class,"text longBio")]
|
||||
Ethnicity: $infoPiece[contains(span,"Ethnicity:")]/$smallInfo|$infoPiece[contains(span,"Ethnicity:")]/text()
|
||||
FakeTits: $infoPiece[contains(span,"Fake Boobs:")]/$smallInfo
|
||||
Piercings: $infoPiece[contains(span,"Piercings:")]/$smallInfo
|
||||
Tattoos: $infoPiece[contains(span,"Tattoos:")]/$smallInfo
|
||||
HairColor: $infoPiece[contains(span,"Hair Color:")]/$smallInfo
|
||||
CareerLength:
|
||||
selector: $infoPiece[contains(span,"Career Start and End:")]/$smallInfo
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s+to\s+
|
||||
with: "-"
|
||||
URL: //link[@rel="canonical"][1]/@href
|
||||
Image: //div[@class="thumbImage"]/img/@src|//img[@id="getAvatar"]/@src
|
||||
sceneScraper:
|
||||
common:
|
||||
$datablob: //script[contains(., 'VideoObject')]/text()
|
||||
$videowrap: //div[@class="video-wrapper"]
|
||||
scene:
|
||||
Title: //meta[@property="og:title"]/@content
|
||||
URL: //meta[@property="og:url"]/@content
|
||||
Date:
|
||||
selector: $datablob
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+(?:"uploadDate"\s*:\s*")([^T]+).+
|
||||
with: $1
|
||||
- parseDate: "2006-01-02"
|
||||
Tags:
|
||||
Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," categoriesWrapper ")]/a/text()|$videowrap//div[contains(concat(" ",normalize-space(@class)," ")," tagsWrapper ")]/a/text()
|
||||
Performers:
|
||||
Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," pornstarsWrapper ")]/a/@data-mxptext
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
Studio:
|
||||
Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," usernameWrap ")]//a/text()
|
||||
|
||||
driver:
|
||||
cookies:
|
||||
- CookieURL: "https://www.pornhub.com"
|
||||
Cookies:
|
||||
- Name: "accessAgeDisclaimerPH"
|
||||
Domain: ".pornhub.com"
|
||||
Value: "1"
|
||||
Path: "/"
|
||||
# Last Updated September 26, 2023
|
||||
Reference in New Issue
Block a user