Files
compose-projects-arr/stash/config/scrapers/community/PerfectGonzo/PerfectGonzo.py
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

102 lines
3.8 KiB
Python

import base64
import datetime
import json
import re
import string
import sys
from urllib.parse import urlparse
# extra modules below need to be installed
try:
import cloudscraper
except ModuleNotFoundError:
print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr)
sys.exit()
try:
from lxml import html, etree
except ModuleNotFoundError:
print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr)
sys.exit()
STUDIO_MAP = {
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_perfectgonzo.com.ico': 'Perfect Gonzo',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_allinternal.com.ico': 'All Internal',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_asstraffic.com.ico': 'Ass Traffic',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_cumforcover.com.ico': 'Cum For Cover',
'https://static-cdn-sapphix.explicithd.com/assets/img/favicon_fistflush.com.png': 'Fist Flush',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_milfthing.com.ico': 'Milf Thing',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_primecups.com.ico': 'Prime Cups',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_purepov.com.ico': 'Pure POV',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_spermswap.com.ico': 'Sperm Swap',
'https://static-cdn-perfectgonzo.explicithd.com/assets/img/favicon_tamedteens.com.ico': 'Tamed Teens'
}
def log(*s):
print(*s, file=sys.stderr)
ret_null = {}
print(json.dumps(ret_null))
sys.exit(1)
frag = json.loads(sys.stdin.read())
if not frag['url']:
log('No URL entered.')
url = frag["url"]
scraper = cloudscraper.create_scraper()
try:
scraped = scraper.get(url)
except:
log("scrape error")
if scraped.status_code >= 400:
log('HTTP Error: %s' % scraped.status_code)
tree = html.fromstring(scraped.content)
title = tree.xpath('//div[@class="row"]//h2/text()')[0].strip()
date = tree.xpath('//div[@class="row"]//span/text()')[0]
date = re.sub("Added\s*", "", date)
details = tree.xpath('//p[@class="mg-md"]')[0]
imgurl = tree.xpath('//video[@id="video"]/@poster | //div[@id="video-hero"]//img/@src')[0]
img = scraper.get(imgurl).content
b64img = base64.b64encode(img)
datauri = "data:image/jpeg;base64,"
studio = tree.xpath('//link[@type="image/ico"]/@href | //link[@type="image/png"]/@href')[0]
studio = STUDIO_MAP[studio]
performers = tree.xpath('//div[contains(h4,"Featured model")]//a/text()')
tags = []
if studio == 'Fist Flush':
tags = [x.replace('/movies?tag[]=', '').replace('&nats=', '') for x in tree.xpath('//div[contains(h4,"Tags:")]//a/@href')]
else:
tag_nodes = tree.xpath("//div[contains(@class, 'tag-container')]/node()")
tag_category = ''
for node in tag_nodes:
if not type(node) is etree._ElementUnicodeResult:
tag_name = node.text_content().strip()
if tag_name == 'Tags:':
continue
tags.append(f'{tag_category} - {tag_name}')
elif node.strip():
tag_category = node.strip()
ret = {
'title': title,
'tags': [{
'name': x
} for x in tags],
'date': datetime.datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d"),
'details': details.text_content().strip(),
'image': datauri + b64img.decode('utf-8'),
'studio': {
'name': studio
},
'performers': [{
'name': x.strip()
} for x in performers]
}
print(json.dumps(ret))