Files
compose-projects-arr/stash/config/scrapers/community/BrokenLatinaWhores/BrokenLatinaWhores.py
Christoph Califice 0a5f88d75a stash
2025-10-10 09:50:30 -03:00

70 lines
2.1 KiB
Python

import re
import sys
import requests
import json
from datetime import datetime
from urllib.parse import urlparse
try:
import py_common.log as log
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr)
sys.exit(1)
try:
from lxml import html
except ModuleNotFoundError:
print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml",
file=sys.stderr)
sys.exit()
session = requests.session()
def get_scraped(inp):
if not inp['url']:
log.error('No URL Entered')
return None
scraped = session.get(inp['url'])
if scraped.status_code >= 400:
log.error('HTTP Error: %s' % scraped.status_code)
return None
log.trace('Scraped the url: ' + inp["url"])
return scraped
def performer_by_url():
inp = json.loads(sys.stdin.read())
scraped = get_scraped(inp)
if not scraped:
return {}
tree = html.fromstring(scraped.content)
image = tree.xpath('//div[contains(@class, "model_picture")]/img/@src0_3x')[0].strip()
image = '{uri.scheme}://{uri.netloc}/{img}'.format(uri=urlparse(scraped.url), img=image[1:])
name = tree.xpath('//meta[@name="keywords"]/@content')[0].strip().capitalize()
birthdate = re.search("([0-9]{2})", "".join(tree.xpath('//div[@class="model_bio"]/text()'))).group(0)
birthdate = datetime.now().replace(year=datetime.now().year - int(birthdate)).replace(month=1, day=1).strftime(
'%Y-%m-%d')
return {
"Image": image,
"Name": name,
"Disambiguation": "Broken Latina Whores",
"Gender": "Female",
"Birthdate": birthdate,
"Ethnicity": "Latin"
}
if sys.argv[1] == "performerByURL":
print(json.dumps(performer_by_url()))
else:
log.error("Unknown argument passed: " + sys.argv[1])
print("{}")
# Last Updated March 16, 2024