137 lines
4.2 KiB
YAML
137 lines
4.2 KiB
YAML
name: Bang
|
|
sceneByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- bang.com/video
|
|
- bangpremium.com/video
|
|
scraper: sceneScraper
|
|
sceneByName:
|
|
action: scrapeXPath
|
|
queryURL: https://www.bang.com/videos?term={}
|
|
scraper: sceneSearch
|
|
sceneByQueryFragment:
|
|
action: scrapeXPath
|
|
queryURL: "{url}"
|
|
scraper: sceneScraper
|
|
movieByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- bang.com/dvd
|
|
- bangpremium.com/dvd
|
|
scraper: movieScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- bang.com/pornstar
|
|
- bangpremium.com/pornstar
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
sceneSearch:
|
|
common:
|
|
$scenes: //div[contains(@class, "video_container")]
|
|
scene:
|
|
Title: $scenes//a/span
|
|
Date:
|
|
selector: $scenes//span[contains(., "•")]/following-sibling::text()
|
|
postProcess:
|
|
- parseDate: Jan 02, 2006
|
|
Image: $scenes//img/@src
|
|
URL:
|
|
selector: $scenes//a[contains(@href, "/video/")]/@href
|
|
postProcess: &prependHost
|
|
- replace:
|
|
- regex: ^
|
|
with: https://www.bang.com
|
|
sceneScraper:
|
|
common:
|
|
$movie: //div[@data-controller="video-entry"]//a[contains(@href,"/dvd/")]//div[contains(@class,"name")]/span[1]
|
|
$performer: //p[contains(@class,"capitalize") and contains(text(),"With:")]/a[contains(@href,"/pornstar/")]
|
|
scene:
|
|
Title: //meta[@property="og:title"]/@content
|
|
Details: //meta[@name="description"]/@content
|
|
Image:
|
|
selector: //meta[@property="og:image"]/@content
|
|
postProcess:
|
|
- replace:
|
|
- regex: (.+)(\?.+)
|
|
with: $1
|
|
Date:
|
|
selector: //p[contains(text(),"Date:")]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: \w+:\s*(\w+\s)(\d+),(\s\d{4}).*
|
|
with: $1$2$3
|
|
- parseDate: Jan 02 2006
|
|
Tags:
|
|
Name:
|
|
selector: //div[@class="actions"]/a
|
|
Performers:
|
|
Name: $performer
|
|
URL:
|
|
selector: $performer/@href
|
|
postProcess: *prependHost
|
|
Studio:
|
|
Name: //p[contains(text(),"Studio:")]//a[contains(@href,"from=")]/img/@alt
|
|
Movies:
|
|
Name: $movie
|
|
URL:
|
|
selector: $movie/@href
|
|
postProcess: *prependHost
|
|
movieScraper:
|
|
common:
|
|
$details: //div[@class="w-full"][1]
|
|
$image: //body/div[contains(@class,"w-full")][3]/div/div/picture//img[contains(@src,"/front")]/@src
|
|
movie:
|
|
Name:
|
|
selector: $details/h1
|
|
Duration:
|
|
selector: //p[contains(text(),"Date:")]/span[2]/text()
|
|
Date:
|
|
selector: //p[contains(text(),"Date:")]/span[1]/text()
|
|
postProcess:
|
|
- parseDate: Jan 02, 2006
|
|
Synopsis: $details//p[contains(@class,"clear-both")]
|
|
Studio:
|
|
Name: $details//p[contains(text(),"Studio")]/a/text()
|
|
FrontImage:
|
|
selector: $image
|
|
postProcess:
|
|
- replace:
|
|
- regex: \?.*$
|
|
with: ""
|
|
BackImage:
|
|
selector: $image
|
|
postProcess:
|
|
- replace:
|
|
- regex: /front\.jpg?.*$
|
|
with: "/back.jpg"
|
|
performerScraper:
|
|
common:
|
|
$overlay: //div[@class="flex flex-col md:items-start items-center"]
|
|
performer:
|
|
Name: $overlay/h2
|
|
URL:
|
|
selector: //link[@rel="canonical"][1]/@href
|
|
Birthdate:
|
|
selector: $overlay//div[contains(text(),"Born")]/span[contains(text(),"old")]
|
|
postProcess:
|
|
- replace:
|
|
- regex: \s*(\w+\s)(\d+),(\s\d{4}).*
|
|
with: $1$2$3
|
|
- parseDate: January 2 2006
|
|
HairColor:
|
|
selector: $overlay//div[contains(@class,"md:text-left")]/text()[contains(.,"Hair Color")]/following-sibling::span[1]
|
|
Ethnicity:
|
|
selector: $overlay//div[contains(@class,"md:text-left")]/text()[contains(.,"Ethnicity")]/following-sibling::span[1]
|
|
EyeColor:
|
|
selector: $overlay//div[contains(@class,"md:text-left")]/text()[contains(.,"Eye Color")]/following-sibling::span[1]
|
|
Aliases:
|
|
selector: $overlay//h1/following-sibling::div
|
|
Image:
|
|
selector: //div[@class="relative"]//img/@src
|
|
postProcess:
|
|
- replace:
|
|
- regex: \?.+$
|
|
with: ""
|
|
# Last Updated March 08, 2024
|