124 lines
3.8 KiB
Python
124 lines
3.8 KiB
Python
import json
|
|
import sys
|
|
|
|
try:
|
|
import requests
|
|
except ModuleNotFoundError:
|
|
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
|
|
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
|
|
sys.exit()
|
|
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
except ModuleNotFoundError:
|
|
print("You need to install the BeautifulSoup module. (https://pypi.org/project/beautifulsoup4/)", file=sys.stderr)
|
|
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", file=sys.stderr)
|
|
sys.exit()
|
|
|
|
# Print debug message.
|
|
PRINT_DEBUG = True
|
|
|
|
|
|
def debug(q):
|
|
q = str(q)
|
|
if "[DEBUG]" in q and PRINT_DEBUG == False:
|
|
return
|
|
print(q, file=sys.stderr)
|
|
|
|
ret = {}
|
|
performers = []
|
|
|
|
|
|
def scrape_scene_img_and_tags(url):
|
|
# Redirect to https://premiumbukkake.com/
|
|
url = url.replace("free.", "")
|
|
url = url.replace(".com/", ".com/tour2/")
|
|
|
|
r = requests.get(url)
|
|
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
html_text = soup.find('div', attrs={'class': 'section tour'})
|
|
|
|
# Get Image
|
|
html_img = html_text.find('div', attrs={'class': 'slide_avatar'})
|
|
scene_img = html_img.find('img').attrs['data-src']
|
|
ret['image'] = f"https://premiumbukkake.com{scene_img}"
|
|
|
|
html_info = html_text.find_all('div', attrs={'class': 'slide_info_row'})
|
|
|
|
# Get Tags
|
|
scene_tags = html_info[2].find_all('a')
|
|
|
|
tags = []
|
|
for tag in scene_tags:
|
|
tags.append({'name': tag.text.strip()})
|
|
ret['tags'] = tags
|
|
|
|
# Get Performers
|
|
scene_performers = html_info[1].find_all('a')
|
|
|
|
for performer in scene_performers:
|
|
performers.append(performer.text.strip())
|
|
|
|
|
|
def scrape_performer(name):
|
|
modified_name = name.replace(' ', '-')
|
|
performer_url = f"https://premiumbukkake.com/tour2/models/{modified_name}.html"
|
|
r = requests.get(performer_url)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
html_content = soup.find('div', attrs={'class': 'block-bio-content'})
|
|
html_stats = soup.find('div', attrs={'class': 'block-bio-stats'}).find_all('dd')
|
|
html_bio = soup.find('div', attrs={'class': 'block-bio-text'}).find('p').getText()
|
|
html_img = soup.find('div', attrs={'class': 'block-bio-img'}).find('img').attrs['data-src']
|
|
|
|
perfomer_details = {'name': name, 'gender': 'Female', 'height': html_stats[1].getText(),
|
|
'measurements': html_stats[2].getText(), 'url': performer_url,
|
|
'image': f"http{html_img}", 'details': html_bio}
|
|
|
|
return perfomer_details
|
|
|
|
|
|
def scrape_scene_url(url):
|
|
r = requests.get(url)
|
|
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
script_text = soup.find('script', attrs={'type': 'application/ld+json'}).string.replace("\n", '')
|
|
json_script = json.loads(script_text)
|
|
|
|
# Add Scene Details
|
|
ret['title'] = json_script['name']
|
|
ret['details'] = json_script['description']
|
|
ret['date'] = json_script['uploadDate']
|
|
|
|
scrape_scene_img_and_tags(url)
|
|
|
|
# Add Studio Details
|
|
ret['studio'] = {}
|
|
ret['studio']['name'] = 'Premium Bukkake'
|
|
|
|
# Add Performer Details
|
|
ret['performers'] = {}
|
|
|
|
perf = []
|
|
for perf_name in performers:
|
|
try:
|
|
perf.append(scrape_performer(perf_name.strip()))
|
|
except Exception:
|
|
debug("[DEBUG] UNABLE TO SCRAPE PERFROMER")
|
|
pass
|
|
|
|
ret['performers'] = perf
|
|
return ret
|
|
|
|
|
|
FRAGMENT = json.loads(sys.stdin.read())
|
|
SCENE_URL = FRAGMENT.get("url")
|
|
|
|
if SCENE_URL:
|
|
debug(f"[DEBUG] Url Scraping: {SCENE_URL}")
|
|
result = scrape_scene_url(SCENE_URL)
|
|
print(json.dumps(result))
|
|
else:
|
|
debug("[DEBUG] Nothing Provided To Scrape")
|
|
# Last Updated October 11, 2021
|