151 lines
5.7 KiB
Python
151 lines
5.7 KiB
Python
import datetime
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from configparser import ConfigParser, NoSectionError
|
|
from urllib.parse import urlparse
|
|
|
|
# to import from a parent directory we need to add that directory to the system path
|
|
csd = os.path.dirname(
|
|
os.path.realpath(__file__)) # get current script directory
|
|
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
|
|
sys.path.append(
|
|
parent
|
|
) # add parent dir to sys path so that we can import py_common from there
|
|
|
|
try:
|
|
from py_common import log
|
|
except ModuleNotFoundError:
|
|
print(
|
|
"You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
|
|
file=sys.stderr)
|
|
sys.exit()
|
|
|
|
try:
|
|
import requests
|
|
except ModuleNotFoundError:
|
|
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
|
|
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
|
|
sys.exit()
|
|
|
|
def sendRequest(url, req_headers):
|
|
req = ""
|
|
try:
|
|
req = requests.get(url, headers=req_headers, timeout=(3, 5))
|
|
except requests.exceptions.RequestException:
|
|
log.error("An error has occurred with Requests")
|
|
log.error("Check your ModelCentroAPI.log for more details")
|
|
with open("ModelCentroAPI.log", 'w', encoding='utf-8') as log_file:
|
|
log_file.write(f"Request:\n{req}")
|
|
sys.exit(1)
|
|
return req
|
|
|
|
|
|
def check_config(time_now):
|
|
if os.path.isfile(SET_FILE_URL):
|
|
config = ConfigParser()
|
|
config.read(SET_FILE_URL)
|
|
try:
|
|
ini_keys1 = config.get(DOMAIN_URL, 'keys1')
|
|
ini_keys2 = config.get(DOMAIN_URL, 'keys2')
|
|
ini_date = config.get(DOMAIN_URL, 'date')
|
|
time_past = datetime.datetime.strptime(ini_date, '%Y-%m-%d %H:%M:%S.%f')
|
|
# Key for 1 days
|
|
if time_past.day - time_now == 0:
|
|
log.debug("Using old API keys")
|
|
return ini_keys1, ini_keys2
|
|
log.debug("Need new API keys")
|
|
except NoSectionError:
|
|
pass
|
|
return None, None
|
|
|
|
|
|
def write_config(keys1, keys2):
|
|
config = ConfigParser()
|
|
config.read(SET_FILE_URL)
|
|
try:
|
|
config.get(DOMAIN_URL, 'date')
|
|
except NoSectionError:
|
|
config.add_section(DOMAIN_URL)
|
|
config.set(DOMAIN_URL, 'keys1', keys1)
|
|
config.set(DOMAIN_URL, 'keys2', keys2)
|
|
config.set(DOMAIN_URL, 'date', str(datetime.datetime.now()))
|
|
with open(SET_FILE_URL, 'w', encoding='utf-8') as configfile:
|
|
config.write(configfile)
|
|
|
|
FRAGMENT = json.loads(sys.stdin.read())
|
|
SCENE_URL = FRAGMENT["url"]
|
|
DOMAIN_URL = urlparse(SCENE_URL).netloc
|
|
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'
|
|
SET_FILE_URL = "ModelCentroAPI.ini"
|
|
|
|
scene_id = re.search(r"/(\d+)/*", SCENE_URL).group(1)
|
|
if not scene_id:
|
|
log.error(f"Error with the ID ({SCENE_URL})\nAre you sure that your URL is correct ?")
|
|
sys.exit(1)
|
|
|
|
timenow = datetime.datetime.now()
|
|
api_key1, api_key2 = check_config(timenow.day)
|
|
if api_key1 is None:
|
|
log.debug("Going to the URL...")
|
|
url_headers = {
|
|
'User-Agent': USER_AGENT
|
|
}
|
|
r = sendRequest(SCENE_URL, url_headers)
|
|
page_html = r.text
|
|
try:
|
|
api_function = re.findall(
|
|
r'_fox_init(.+)</script>', page_html, re.DOTALL | re.MULTILINE)[0]
|
|
api_key1 = re.findall(
|
|
r'ah":"([a-zA-Z0-9_-]+)"', api_function, re.MULTILINE)[0]
|
|
api_key2 = re.findall(r'aet":(\d+),"', api_function, re.MULTILINE)[0]
|
|
# Need to reverse this key
|
|
api_key1 = api_key1[::-1]
|
|
write_config(api_key1, api_key2)
|
|
except IndexError:
|
|
log.error("There is a problem with getting API identification")
|
|
sys.exit(1)
|
|
|
|
log.debug("Asking the Scene API...")
|
|
api_url = f"https://{DOMAIN_URL}/sapi/{api_key1}/{api_key2}/content.load?_method=content.load&tz=1&filter[id][fields][0]=id&filter[id][values][0]={scene_id}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene"
|
|
headers = {
|
|
'User-Agent': USER_AGENT,
|
|
'Referer': SCENE_URL
|
|
}
|
|
r = sendRequest(api_url, headers)
|
|
try:
|
|
scene_api_json = r.json()['response']['collection'][0]
|
|
except:
|
|
log.error("Error with Request API")
|
|
sys.exit(1)
|
|
|
|
log.debug("Trying the Performer API...")
|
|
perf_list = []
|
|
api_url = f"https://{DOMAIN_URL}/sapi/{api_key1}/{api_key2}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&transitParameters[contentId]={scene_id}"
|
|
r = sendRequest(api_url, headers)
|
|
try:
|
|
performer_api_json = r.json()['response']['collection']
|
|
for perf_id in performer_api_json:
|
|
for perf_id2 in performer_api_json[perf_id]['modelId']['collection']:
|
|
performer_name=performer_api_json[perf_id]['modelId']['collection'][perf_id2]['stageName']
|
|
perf_list.append({"name": performer_name})
|
|
except:
|
|
log.error("Performer API failed")
|
|
# Time to scrape all data
|
|
scrape = {}
|
|
scrape['title'] = scene_api_json.get('title')
|
|
date = datetime.datetime.strptime(scene_api_json['sites']['collection'][scene_id].get('publishDate'), '%Y-%m-%d %H:%M:%S')
|
|
scrape['date'] = str(date.date())
|
|
scrape['details'] = scene_api_json.get('description')
|
|
scrape['studio'] = {}
|
|
scrape['studio']['name'] = re.sub(r'www\.|\.com', '', DOMAIN_URL)
|
|
if perf_list:
|
|
scrape['performers'] = perf_list
|
|
scrape['tags'] = [{"name": scene_api_json['tags']['collection'][x].get('alias')} for x in scene_api_json['tags']['collection']]
|
|
scrape['image'] = scene_api_json['_resources']['primary'][0]['url']
|
|
for key_name, key_value in scrape.items():
|
|
log.debug(f'[{key_name}]:{key_value}')
|
|
|
|
print(json.dumps(scrape))
|