stash
This commit is contained in:
10
stash/config/scrapers/community/multiscrape/manifest
Executable file
10
stash/config/scrapers/community/multiscrape/manifest
Executable file
@@ -0,0 +1,10 @@
|
||||
id: multiscrape
|
||||
name: multiscrape
|
||||
metadata: {}
|
||||
version: fbd81c5
|
||||
date: "2023-11-22 00:31:17"
|
||||
requires: []
|
||||
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
|
||||
files:
|
||||
- multiscrape.py
|
||||
- multiscrape.yml
|
||||
258
stash/config/scrapers/community/multiscrape/multiscrape.py
Normal file
258
stash/config/scrapers/community/multiscrape/multiscrape.py
Normal file
@@ -0,0 +1,258 @@
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
|
||||
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
class multiscrape:
|
||||
|
||||
url="http://localhost:9999/graphql"
|
||||
headers = {
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Connection": "keep-alive",
|
||||
"DNT": "1"
|
||||
}
|
||||
|
||||
'''
|
||||
update the below config in the preferred order for each field.
|
||||
If there are no results for that performer and field it will use the results of the next scraper in the list and cache the results.'''
|
||||
config ={
|
||||
"gender": ['stash-sqlite'],
|
||||
"url" : ['Babepedia','stash-sqlite','FreeonesCommunity','Brazzers','Pornhub'],
|
||||
"twitter":['Babepedia','stash-sqlite'],
|
||||
"instagram": ['Babepedia'],
|
||||
"birthdate": ['IMBD','FreeonesCommunity','Babepedia','stash-sqlite'],
|
||||
"ethnicity": ['Babepedia','stash-sqlite'],
|
||||
"country": ['Babepedia','stash-sqlite'],
|
||||
"eye_color": ['Babepedia','stash-sqlite'],
|
||||
"height":['Babepedia','Pornhub','stash-sqlite'],
|
||||
"measurements":['Babepedia','Pornhub','FreeonesCommunity','stash-sqlite'],
|
||||
"fake_tits":['Babepedia','stash-sqlite'],
|
||||
"career_length": ['Pornhub','Babepedia','stash-sqlite'],
|
||||
"tattoos":['Babepedia','stash-sqlite'],
|
||||
"piercings": ['Babepedia','stash-sqlite'],
|
||||
"aliases": ['Babepedia','stash-sqlite'],
|
||||
"tags": ['Babepedia'],
|
||||
"details": ['FreeonesCommunity','Babepedia','Brazzers'],
|
||||
"death_date": ['Babepedia'],
|
||||
"hair_color": ['Babepedia'],
|
||||
"weight":['Babepedia','FreeonesCommunity'],
|
||||
"image": ['performer-image-dir','Babepedia','FreeonesCommunity']
|
||||
}
|
||||
|
||||
|
||||
def __log(self,levelChar, s):
|
||||
if levelChar == "":
|
||||
return
|
||||
|
||||
print(self.__prefix(levelChar) + s + "\n", file=sys.stderr, flush=True)
|
||||
|
||||
def trace(self,s):
|
||||
self.__log(b't', s)
|
||||
|
||||
def debug(self,s):
|
||||
self.__log(b'd', s)
|
||||
|
||||
def info(self,s):
|
||||
self.__log(b'i', s)
|
||||
|
||||
def warning(self,s):
|
||||
self.__log(b'w', s)
|
||||
|
||||
def error(self,s):
|
||||
self.__log(b'e', s)
|
||||
|
||||
|
||||
|
||||
def __callGraphQL(self, query, variables=None):
|
||||
json = {}
|
||||
json['query'] = query
|
||||
if variables != None:
|
||||
json['variables'] = variables
|
||||
|
||||
# handle cookies
|
||||
response = requests.post(self.url, json=json, headers=self.headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get("error", None):
|
||||
for error in result["error"]["errors"]:
|
||||
raise Exception("GraphQL error: {}".format(error))
|
||||
if result.get("data", None):
|
||||
return result.get("data")
|
||||
else:
|
||||
raise Exception(
|
||||
"GraphQL query failed:{} - {}. Query: {}. Variables: {}".format(response.status_code, response.content, query, variables))
|
||||
|
||||
|
||||
def list_scrapers(self, type):
|
||||
query = """query listPerformerScrapers {
|
||||
listPerformerScrapers {
|
||||
id
|
||||
name
|
||||
performer{
|
||||
supported_scrapes
|
||||
}
|
||||
}
|
||||
}"""
|
||||
ret = []
|
||||
result = self.__callGraphQL(query)
|
||||
for r in result["listSceneScrapers"]:
|
||||
if type in r["scene"]["supported_scrapes"]:
|
||||
ret.append(r["id"])
|
||||
return ret
|
||||
|
||||
def scrape_performer_list(self, scraper_id, performer):
|
||||
query = """query scrapePerformerList($scraper_id: ID!, $performer: String!) {
|
||||
scrapePerformerList(scraper_id: $scraper_id, query: $performer) {
|
||||
name
|
||||
url
|
||||
gender
|
||||
twitter
|
||||
instagram
|
||||
birthdate
|
||||
ethnicity
|
||||
country
|
||||
eye_color
|
||||
height
|
||||
measurements
|
||||
fake_tits
|
||||
career_length
|
||||
tattoos
|
||||
piercings
|
||||
aliases
|
||||
image
|
||||
}
|
||||
}"""
|
||||
|
||||
variables = {'scraper_id': scraper_id, 'performer': performer}
|
||||
result = self.__callGraphQL(query, variables)
|
||||
if result is not None:
|
||||
return result["scrapePerformerList"]
|
||||
return None
|
||||
|
||||
def scrape_performer(self, scraper_id, performer):
|
||||
query = """query scrapePerformer($scraper_id: ID!, $performer: ScrapedPerformerInput!) {
|
||||
scrapePerformer(scraper_id: $scraper_id, scraped_performer: $performer) {
|
||||
name
|
||||
url
|
||||
gender
|
||||
twitter
|
||||
instagram
|
||||
birthdate
|
||||
ethnicity
|
||||
country
|
||||
eye_color
|
||||
height
|
||||
measurements
|
||||
fake_tits
|
||||
career_length
|
||||
tattoos
|
||||
piercings
|
||||
aliases
|
||||
image
|
||||
}
|
||||
}"""
|
||||
variables = {'scraper_id': scraper_id, 'performer': performer}
|
||||
result = self.__callGraphQL(query, variables)
|
||||
return result["scrapePerformer"]
|
||||
|
||||
def requred_scrapers(self):
|
||||
scrapers=[]
|
||||
for key in self.config.keys():
|
||||
for s in self.config.get(key):
|
||||
if s not in scrapers:
|
||||
scrapers.append(s)
|
||||
return scrapers
|
||||
|
||||
|
||||
def query_performers(self,name):
|
||||
ret=[]
|
||||
|
||||
for scraper in self.requred_scrapers():
|
||||
print("Querying performers "+ scraper, file=sys.stderr)
|
||||
tmp=self.scrape_performer_list(scraper,name)
|
||||
if tmp is not None:
|
||||
for s in tmp:
|
||||
found=False
|
||||
for t in ret:
|
||||
if s["name"]==t["name"]:
|
||||
found=True
|
||||
if not found:
|
||||
ret.append(s)
|
||||
return ret
|
||||
|
||||
def fetch_performer(self,name):
|
||||
ret={"name":name}
|
||||
|
||||
scraper_cache={}
|
||||
|
||||
for field in self.config.keys():
|
||||
found=False
|
||||
for s in self.config[field]:
|
||||
if s in scraper_cache.keys():
|
||||
if field in scraper_cache[s]:
|
||||
ret[field]=scraper_cache[s][field]
|
||||
print("updating field from cache using scraper: " + s +" for field: " +field, file=sys.stderr)
|
||||
found=True
|
||||
if s not in scraper_cache.keys() and not found:
|
||||
print("Running scraper: " + s +" " +field, file=sys.stderr)
|
||||
spl=self.scrape_performer_list(s, name)
|
||||
if spl is not None:
|
||||
for spli in spl:
|
||||
if spli["name"].lower()==name.lower():
|
||||
r=self.scrape_performer(s, {"name":spli["name"], "url":spli["url"]})
|
||||
if r is not None:
|
||||
scraper_cache[s]=r
|
||||
found=True
|
||||
break;
|
||||
if found:
|
||||
print("Saving results from scraper: " +field + " " +s,file=sys.stderr)
|
||||
if field in scraper_cache[s]:
|
||||
ret[field]=scraper_cache[s][field]
|
||||
else:
|
||||
found=False
|
||||
else:
|
||||
scraper_cache[s]={}
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
if sys.argv[1] == "query":
|
||||
fragment = json.loads(sys.stdin.read())
|
||||
print("input: " + json.dumps(fragment),file=sys.stderr)
|
||||
scraper=multiscrape()
|
||||
result = scraper.query_performers(fragment['name'])
|
||||
if not result:
|
||||
print(f"Could not determine details for performer: `{fragment['name']}`",file=sys.stderr)
|
||||
print("{}")
|
||||
else:
|
||||
print (json.dumps(result))
|
||||
|
||||
if sys.argv[1] == "fetch":
|
||||
fragment = json.loads(sys.stdin.read())
|
||||
print("input: " + json.dumps(fragment),file=sys.stderr)
|
||||
scraper=multiscrape()
|
||||
result = scraper.fetch_performer(fragment['name'])
|
||||
if not result:
|
||||
print(f"Could not determine details for performer: `{fragment['name']}`",file=sys.stderr)
|
||||
print("{}")
|
||||
else:
|
||||
True
|
||||
print (json.dumps(result))
|
||||
|
||||
|
||||
|
||||
if sys.argv[1] == "test":
|
||||
scraper=multiscrape()
|
||||
scrapers=scraper.requred_scrapers()
|
||||
print(scrapers)
|
||||
|
||||
|
||||
17
stash/config/scrapers/community/multiscrape/multiscrape.yml
Normal file
17
stash/config/scrapers/community/multiscrape/multiscrape.yml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: multiscrape
|
||||
|
||||
performerByFragment:
|
||||
action: script
|
||||
script:
|
||||
- python
|
||||
- multiscrape.py
|
||||
- fetch
|
||||
|
||||
performerByName:
|
||||
action: script
|
||||
script:
|
||||
- python
|
||||
- multiscrape.py
|
||||
- query
|
||||
|
||||
# Last Updated December 16, 2021
|
||||
Reference in New Issue
Block a user