This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,185 @@
import base64
import json
import os
import re
import sys
from datetime import datetime
# to import from a parent directory we need to add that directory to the system path
csd = os.path.dirname(
os.path.realpath(__file__)) # get current script directory
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
sys.path.append(
parent
) # add parent dir to sys path so that we can import py_common from there
try:
import py_common.log as log
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr,
)
sys.exit()
try:
import requests
except ModuleNotFoundError:
log.error(
"You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)"
)
log.error(
"If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests"
)
sys.exit()
PROXIES = {}
TIMEOUT = 10
class Redgifs:
def __init__(self):
self.session = requests.Session()
self.session.headers.update(
{"content-type": "application/json; charset=UTF-8"}
)
self.session.proxies.update(PROXIES)
self.getTemporaryToken()
def log_session_headers(self):
log.debug(self.session.headers)
def GET_req(self, url):
scraped = None
try:
scraped = self.session.get(url, timeout=TIMEOUT)
except:
log.error("scrape error")
return None
if scraped.status_code >= 400:
log.error(f"HTTP Error: {scraped.status_code}")
return None
return scraped.content
def GET_req_json(self, url):
scraped = None
try:
scraped = self.session.get(url, timeout=TIMEOUT)
except:
log.error("scrape error")
return None
if scraped.status_code >= 400:
log.error(f"HTTP Error: {scraped.status_code}")
return None
return scraped.json()
def output_json(self, title, tags, url, b64img, performers, date):
return {
"title": title,
"tags": [{"name": x} for x in tags],
"url": url,
"image": "data:image/jpeg;base64," + b64img.decode("utf-8"),
"performers": [{"name": x.strip()} for x in performers],
"date": date
}
def getTemporaryToken(self):
req = self.GET_req_json("https://api.redgifs.com/v2/auth/temporary")
authToken = req.get("token")
self.session.headers.update(
{"Authorization": 'Bearer ' + authToken,}
)
log.debug(req)
def getIdFromUrl(self, url):
id = url.split("/")
id = id[-1]
id = id.split("?")[0]
return id;
def getApiUrlFromId(self, id):
return f"https://api.redgifs.com/v2/gifs/{id}?views=yes&users=yes"
def getParseUrl(self, url):
id = self.getIdFromUrl(url)
return self.getParseId(id)
def getParseId(self, id):
id_lowercase = id.lower()
log.debug(str(id))
apiurl = self.getApiUrlFromId(id_lowercase)
req = self.GET_req_json(apiurl)
log.debug(req)
gif = req.get("gif")
user = req.get("user")
tags = gif.get("tags")
date = gif.get("createDate")
date = datetime.fromtimestamp(date)
date = str(date.date())
imgurl = gif.get("urls").get("poster")
img = self.GET_req(imgurl)
b64img = base64.b64encode(img)
studio_name = user.get("name")
performers = []
if user.get("name"):
performers = [user.get("name")]
elif user.get("username"):
performers = [user.get("username")]
return self.output_json(
id, tags, f"https://www.redgifs.com/watch/{id}", b64img, performers, date
)
def parseFilename(filename):
id = filename.replace("redgifs_", "") #remove possible filename prefix
id = id.split(".")[0] #remove file extension
return id
FRAGMENT = json.loads(sys.stdin.read())
log.debug(FRAGMENT)
scraper = Redgifs()
result = ""
if sys.argv[1] == "url":
url = FRAGMENT.get("url")
log.debug(url)
result = json.dumps(scraper.getParseUrl(url))
elif sys.argv[1] == "queryFragment" or sys.argv[1] == "fragment":
id = parseFilename(FRAGMENT.get("title"))
log.debug(id)
result = json.dumps(scraper.getParseId(id))
elif sys.argv[1] == "name":
id = parseFilename(FRAGMENT.get("name"))
log.debug(id)
result = json.dumps([scraper.getParseId(id)])
print(result)