stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/JacquieEtMichelTV/JacquieEtMichelTV.py
+++ b/stash/config/scrapers/community/JacquieEtMichelTV/JacquieEtMichelTV.py
@@ -0,0 +1,98 @@
+import base64
+import datetime
+import json
+import string
+import sys
+from urllib.parse import urlparse
+# extra modules below need to be installed
+try:
+    import cloudscraper
+except ModuleNotFoundError:
+    print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr)
+    sys.exit()
+
+try:
+    from lxml import html
+except ModuleNotFoundError:
+    print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr)
+    sys.exit()
+
+try:
+    import py_common.graphql as graphql
+    import py_common.log as log
+except ModuleNotFoundError:
+    print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr)
+    sys.exit()
+
+lang = 'en'
+
+if len(sys.argv) > 1:
+    if sys.argv[1] == 'fr':
+        lang = 'fr'
+
+frag = json.loads(sys.stdin.read())
+if not frag['url']:
+    log.error('No URL entered.')
+    sys.exit(1)
+
+url = frag["url"]
+scraper = cloudscraper.create_scraper()
+try:
+    cookies = {'lang': lang}
+    scraped = scraper.get(url, cookies=cookies)
+except:
+    log.error("scrape error")
+    sys.exit(1)
+
+if scraped.status_code >= 400:
+    log.error(f'HTTP Error: {scraped.status_code}')
+    sys.exit(1)
+
+tree = html.fromstring(scraped.text)
+
+title = None
+title_res = tree.xpath("//h1/text()")
+if title_res:
+    title = title_res[0]
+date = None
+dt = tree.xpath("//span[@class='video-detail__date']/text()")
+if dt:
+    f, *m, l = dt[0].split()
+    log.debug(f"found date: {l}")
+    if l:
+        if lang == 'fr':
+            date = datetime.datetime.strptime(l,
+                                              "%d/%m/%Y").strftime("%Y-%m-%d")
+        else:
+            # en
+            date = datetime.datetime.strptime(l,
+                                              "%m/%d/%Y").strftime("%Y-%m-%d")
+desc = tree.xpath("//meta[@property='og:description']/@content")
+details = ""
+if desc:
+    details = desc[0]
+tags = tree.xpath("//a[@class='video-detail__tag-list__link']/text()")
+imgurl_res = tree.xpath("//video[@id='video-player']/@poster")
+datauri = None
+if imgurl_res:
+    imgurl = imgurl_res[0]
+    img = scraper.get(imgurl).content
+    b64img = base64.b64encode(img)
+    datauri = "data:image/jpeg;base64,"
+
+ret = {
+    'title': title,
+    'tags': [{
+        'name': x.strip()
+    } for x in tags],
+    'date': date,
+    'details': details,
+    'image': datauri + b64img.decode('utf-8'),
+    'studio': {
+        'name': 'Jacquie Et Michel TV'
+    },
+}
+
+print(json.dumps(ret))