stash
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import re
|
||||
import sys
|
||||
import requests
|
||||
import json
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
import py_common.log as log
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
from lxml import html
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
|
||||
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml",
|
||||
file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
session = requests.session()
|
||||
|
||||
|
||||
def get_scraped(inp):
|
||||
if not inp['url']:
|
||||
log.error('No URL Entered')
|
||||
return None
|
||||
|
||||
scraped = session.get(inp['url'])
|
||||
if scraped.status_code >= 400:
|
||||
log.error('HTTP Error: %s' % scraped.status_code)
|
||||
return None
|
||||
log.trace('Scraped the url: ' + inp["url"])
|
||||
return scraped
|
||||
|
||||
|
||||
def performer_by_url():
|
||||
inp = json.loads(sys.stdin.read())
|
||||
scraped = get_scraped(inp)
|
||||
if not scraped:
|
||||
return {}
|
||||
|
||||
tree = html.fromstring(scraped.content)
|
||||
image = tree.xpath('//div[contains(@class, "model_picture")]/img/@src0_3x')[0].strip()
|
||||
image = '{uri.scheme}://{uri.netloc}/{img}'.format(uri=urlparse(scraped.url), img=image[1:])
|
||||
name = tree.xpath('//meta[@name="keywords"]/@content')[0].strip().capitalize()
|
||||
birthdate = re.search("([0-9]{2})", "".join(tree.xpath('//div[@class="model_bio"]/text()'))).group(0)
|
||||
birthdate = datetime.now().replace(year=datetime.now().year - int(birthdate)).replace(month=1, day=1).strftime(
|
||||
'%Y-%m-%d')
|
||||
|
||||
return {
|
||||
"Image": image,
|
||||
"Name": name,
|
||||
"Disambiguation": "Broken Latina Whores",
|
||||
"Gender": "Female",
|
||||
"Birthdate": birthdate,
|
||||
"Ethnicity": "Latin"
|
||||
}
|
||||
|
||||
|
||||
if sys.argv[1] == "performerByURL":
|
||||
print(json.dumps(performer_by_url()))
|
||||
else:
|
||||
log.error("Unknown argument passed: " + sys.argv[1])
|
||||
print("{}")
|
||||
|
||||
# Last Updated March 16, 2024
|
||||
Reference in New Issue
Block a user