stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.py
+++ b/stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.py
@@ -0,0 +1,145 @@
+import sys
+import argparse
+import json
+import os
+import requests
+import re
+
+# to import from a parent directory we need to add that directory to the system path
+csd = os.path.dirname(os.path.realpath(__file__))  # get current script directory
+parent = os.path.dirname(csd)  #  parent directory (should be the scrapers one)
+sys.path.append(
+    parent
+)  # add parent dir to sys path so that we can import py_common from ther
+
+try:
+    from lxml import html
+except ModuleNotFoundError:
+    print(
+        "You need to install the lxml module. (https://lxml.de/installation.html#installation)",
+        file=sys.stderr,
+    )
+    print(
+        "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml",
+        file=sys.stderr,
+    )
+    sys.exit()
+
+try:
+    import py_common.log as log
+except ModuleNotFoundError:
+    print(
+        "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
+        file=sys.stderr,
+    )
+    sys.exit()
+
+# Shared client because we're making multiple requests
+client = requests.Session()
+
+
+# Example element:
+# <div class="li-item model h-100 ">
+#   <div class="box pos-rel d-flex flex-column h-100">
+#     <div class="item-img pos-rel">
+#       <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
+#          class="d-block"
+#          title=" Scoreland Profile">
+#         <img src="https://cdn77.scoreuniverse.com/shared-bits/images/male-model-placeholder-photo.jpg" />
+#       </a>
+#     </div>
+#     <div class="info t-c p-2">
+#       <div class="t-trunc t-uc">
+#         <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
+#            title=""
+#            aria-label=" Scoreland Profile"
+#            class="i-model accent-text">
+#         </a>
+#       </div>
+#     </div>
+#   </div>
+# </div>
+def map_performer(el):
+    url = el.xpath(".//a/@href")[0]
+    if "no-model" in url:
+        return None
+    name = el.xpath(".//a/@title")[1]
+    image = el.xpath(".//img/@src")[0]
+    fixed_url = re.sub(r".*?([^/]*(?=/2/0))/2/0/([^?]*)", r"https://www.\1.com/\2", url)
+
+    if client.head(fixed_url).status_code != 200:
+        log.debug(f"Performer '{name}' has a broken profile link, skipping")
+        return None
+
+    return {
+        "name": name,
+        "url": fixed_url,
+        "image": image,
+    }
+
+
+def performer_query(query: str):
+    # Form data to be sent as the POST request body
+    payload = {
+        "ci_csrf_token": "",
+        "keywords": query,
+        "s_filters[site]": "all",
+        "s_filters[type]": "models",
+        "m_filters[sort]": "top_rated",
+        "m_filters[gender]": "any",
+        "m_filters[body_type]": "any",
+        "m_filters[race]": "any",
+        "m_filters[hair_color]": "any",
+    }
+    result = client.post("https://www.scoreland.com/search-es/", data=payload)
+    tree = html.fromstring(result.content)
+    performers = [p for x in tree.find_class("model") if (p := map_performer(x))]
+
+    if not performers:
+        log.warning(f"No performers found for '{query}'")
+    return performers
+
+
+def main():
+    parser = argparse.ArgumentParser("ScoreGroup Scraper", argument_default="")
+    subparsers = parser.add_subparsers(
+        dest="operation", help="Operation to perform", required=True
+    )
+    subparsers.add_parser("search", help="Search for performers").add_argument(
+        "name", nargs="?", help="Name to search for"
+    )
+
+    if len(sys.argv) == 1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+
+    args = parser.parse_args()
+    log.debug(f"Arguments from commandline: {args}")
+    # Script is being piped into, probably by Stash
+    if not sys.stdin.isatty():
+        try:
+            frag = json.load(sys.stdin)
+            args.__dict__.update(frag)
+            log.debug(f"With arguments from stdin: {args}")
+        except json.decoder.JSONDecodeError:
+            log.error("Received invalid JSON from stdin")
+            sys.exit(1)
+
+    if args.operation == "search":
+        name: str = args.name
+        if not name:
+            log.error("No query provided")
+            sys.exit(1)
+        log.debug(f"Searching for '{name}'")
+        matches = performer_query(name)
+        print(json.dumps(matches))
+        sys.exit(0)
+
+    # Just in case the above if statement doesn't trigger somehow
+    # Something has gone quite wrong should this ever get hit
+    log.error("An error has occured")
+    sys.exit(2)
+
+
+if __name__ == "__main__":
+    main()
--- a/stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.yml
+++ b/stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.yml
@@ -0,0 +1,249 @@
+name: TheScoreGroup
+sceneByURL:
+  - action: scrapeXPath
+    url: &urls
+      - 18eighteen.com
+      - 40somethingmag.com
+      - 50plusmilfs.com
+      - 60plusmilfs.com
+      - bigboobspov.com
+      - bigtitangelawhite.com
+      - bigtithitomi.com
+      - bigtithooker.com
+      - bigtitterrynova.com
+      - bigtitvenera.com
+      - bonedathome.com
+      - bootyliciousmag.com
+      - bustyangelique.com
+      - bustyarianna.com
+      - bustydanniashe.com
+      - bustydustystash.com
+      - bustyinescudna.com
+      - bustykellykay.com
+      - bustykerrymarie.com
+      - bustylornamorga.com
+      - bustymerilyn.com
+      - bustyoldsluts.com
+      - chloesworld.com
+      - christymarks.com
+      - cock4stepmom.com
+      - creampieforgranny.com
+      - crystalgunnsworld.com
+      - daylenerio.com
+      - desiraesworld.com
+      - evanottyvideos.com
+      - feedherfuckher.com
+      - flatandfuckedmilfs.com
+      - homealonemilfs.com
+      - karinahart.com
+      - legsex.com
+      - mickybells.com
+      - milftugs.com
+      - mommystoytime.com
+      - naughtymag.com
+      - pickinguppussy.com
+      - pornmegaload.com
+      - reneerossvideos.com
+      - scoreclassics.com
+      - scoreland.com
+      - scoreland2.com
+      - scorevideos.com
+      - sharizelvideos.com
+      - stacyvandenbergboobs.com
+      - tawny-peaks.com
+      - titsandtugs.com
+      - valoryirene.com
+      - xlgirls.com
+      - yourwifemymeat.com
+    scraper: sceneScraper
+galleryByURL:
+  - action: scrapeXPath
+    url: *urls
+    scraper: galleryScraper
+performerByURL:
+  - action: scrapeXPath
+    url: *urls
+    scraper: performerScraper
+performerByName:
+  action: script
+  script:
+    - python
+    - TheScoreGroup.py
+    - search
+
+xPathScrapers:
+  sceneScraper:
+    common:
+      $url: //link[@rel="canonical"]/@href
+      $videopage: //section[@id="videos_page-page" or @id="mixed_page-page"]
+      $stat: //div[contains(concat(' ',normalize-space(@class),' '),' mb-3 ')]
+    scene:
+      Title: $videopage//h1
+       # Original studio is determinable by looking at the CDN links (<source src="//cdn77.scoreuniverse.com/naughtymag/scenes...) this helps set studio for Pornmegaload scenes as nothing is released directly from PML." 
+      Studio:
+        Name:
+          selector: ($videopage//video/source/@src)[1]
+          postProcess:
+            - replace:
+                - regex: .*\.com/(.+?)\/(video|scene).*
+                  with: $1
+            - map: &studioMap
+                18eighteen: 18 Eighteen
+                40somethingmag: 40 Something Mag
+                50plusmilfs: 50 Plus MILFs
+                60plusmilfs: 60 Plus MILFs
+                bigboobspov: Big Boobs POV
+                bigtitangelawhite: Big Tit Angela White
+                bigtithitomi: Big Tit Hitomi
+                bigtithooker: Big Tit Hooker
+                bigtitterrynova: Big Tit Terry Nova
+                bigtitvenera: Big Tit Venera
+                bonedathome: Boned At Home
+                bootyliciousmag: Bootylicious Mag
+                bustyangelique: Busty Angelique
+                bustyarianna: Busty Arianna
+                bustydanniashe: Busty Danni Ashe
+                bustydustystash: busty Dusty Stash
+                bustyinescudna: Busty Ines Cudna
+                bustykellykay: Busty Kelly Kay
+                bustykerrymarie: Busty Kerry Marie
+                bustylornamorga: Busty Lorna Morga
+                bustymerilyn: Busty Merilyn
+                bustyoldsluts: Busty Old Sluts
+                chloesworld: Chloe's World
+                christymarks: Christy Marks
+                cock4stepmom: Cock 4 Stepmom
+                creampieforgranny: Creampie for Granny
+                crystalgunnsworld: Crystal Gunns World
+                daylenerio: Daylene Rio
+                desiraesworld: Desiraes World
+                evanottyvideos: Eva Notty Videos
+                feedherfuckher: Feed Her Fuck Her
+                flatandfuckedmilfs: Flat and Fucked MILFs
+                homealonemilfs: Home Alone MILFs
+                karinahart: Karina Hart
+                legsex: Leg Sex
+                mickybells: Micky Bells
+                milftugs: MILF Tugs
+                mommystoytime: Mommy's Toy Time
+                naughtymag: Naughty Mag
+                pickinguppussy: Picking Up Pussy
+                pornmegaload: Porn Mega Load
+                reneerossvideos: Renee Ross Video
+                scoreclassics: Score Classics
+                # Scoreland2 needs to come before scoreland because it's more specific
+                scoreland2: Scoreland2
+                scoreland: Scoreland
+                scorevideos: Score Videos
+                sharizelvideos: Sha Rizel Videos
+                stacyvandenbergboobs: Stacy Vandenberg Boobs
+                tawny-peaks: Tawny Peaks
+                titsandtugs: Tits And Tugs
+                valoryirene: Valory Irene
+                xlgirls: XL Girls
+                yourwifemymeat: Your Wife My Meat
+      Date: &dateAttr
+        selector: $videopage$stat//span[contains(.,"Date:")]/following-sibling::span
+        postProcess:
+          - replace:
+              - regex: ..,
+                with:
+          - parseDate: January 2 2006
+      Details: &details
+        selector: $videopage//div[@class="p-desc p-3"]/text()
+        postProcess:
+          - replace:
+              - regex: Read More »
+                with:
+              # Attempt to fix erroneous line breaks where HTML tags existed
+              - regex: \n\n([0-9a-zA-Z\.]+)\n\n
+                with: " $1 "
+        concat: "\n\n"
+      Tags:
+        Name: $videopage//a[contains(@href, "videos-tag") or contains(@href, "scenes-tag")]
+      Performers: &performersAttr
+        Name: $videopage//span[contains(.,"Featuring:")]/following-sibling::span/a
+      Image:
+        # This selector scrapes the canonical scene page cover image
+        selector: //script[contains(text(), "poster")]
+        postProcess:
+          - replace:
+              - regex: ^.+poster.+'(.+jpg)'.+$
+                with: $1
+              - regex: ^//
+                with: https://
+      # This Selector scrapes the image posted on social media sites
+      #        selector: //meta[@itemprop="image"]/@content
+      # Enable this post process if you want better image quality but sometimes it can fail
+      #          postProcess:
+      #            - replace:
+      #                - regex: _lg
+      #                  with: _x_800
+      URL: &urlAttr
+        selector: $url
+      Code: &codeAttr
+        selector: $url
+        postProcess:
+          - replace:
+              - regex: .*\/(\d+)\/?$
+                with: $1
+  galleryScraper:
+    common:
+      $photopage: //section[@id='photos_page-page']
+    gallery:
+      Title: //h1
+      Studio:
+        Name:
+          selector: //link[@rel="canonical"]/@href
+          postProcess:
+            - replace:
+                - regex: ^(https://)?.+?([^\.]+)\.com/.*
+                  with: $2
+            - map: *studioMap
+      Date:
+        selector: //div[span[@class="label" and contains(.,"Date")]]/span[@class="value"]/text()
+        postProcess:
+          - replace:
+              - regex: (\d+)(st|nd|rd|th)
+                with: $1
+          - parseDate: January 2, 2006
+      Details:
+        selector: $photopage//div[contains(@class, 'p-desc')]/text()
+        concat: "\n"
+      Tags:
+        Name: //h3/following-sibling::a[not(contains(@class,"btn-1"))]/text()
+      Performers:
+        Name: //div[span[@class="label" and contains(.,"Featuring")]]/span[@class="value"]//a/text()
+
+  performerScraper:
+    performer:
+      Name: //h1[@class="model-title"]
+      Gender:
+        fixed: Female
+      URL: //meta[@property="og:url"]/@content
+      Ethnicity:
+        selector: //span[text()="Ethnicity:"]/following-sibling::span
+        postProcess:
+          - map:
+              White: Caucasian
+      HairColor: //span[text()="Hair Color:"]/following-sibling::span
+      Height:
+        selector: //span[text()="Height:"]/following-sibling::span
+        postProcess:
+          - feetToCm: true
+      Weight:
+        selector: //span[text()="Weight:"]/following-sibling::span
+        postProcess:
+          - replace:
+              - regex: (\d+).*
+                with: $1
+          - lbToKg: true
+      Measurements:
+        selector: //span[text()="Bra Size:"]/following-sibling::span|//span[text()="Measurements:"]/following-sibling::span
+        concat: "-"
+        postProcess:
+          - replace:
+              - regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+)
+                with: $1$2
+      Image: //section[@id="model-page"]//img[@class="lazyload"]/@src
+# Last Updated April 08, 2024
--- a/stash/config/scrapers/community/TheScoreGroup/manifest
+++ b/stash/config/scrapers/community/TheScoreGroup/manifest
@@ -0,0 +1,10 @@
+id: TheScoreGroup
+name: TheScoreGroup
+metadata: {}
+version: 2cd09ad
+date: "2024-04-08 00:18:22"
+requires: []
+source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
+files:
+- TheScoreGroup.yml
+- TheScoreGroup.py