stash
This commit is contained in:
145
stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.py
Normal file
145
stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.py
Normal file
@@ -0,0 +1,145 @@
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import re
|
||||
|
||||
# to import from a parent directory we need to add that directory to the system path
|
||||
csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory
|
||||
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
|
||||
sys.path.append(
|
||||
parent
|
||||
) # add parent dir to sys path so that we can import py_common from ther
|
||||
|
||||
try:
|
||||
from lxml import html
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to install the lxml module. (https://lxml.de/installation.html#installation)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
import py_common.log as log
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
# Shared client because we're making multiple requests
|
||||
client = requests.Session()
|
||||
|
||||
|
||||
# Example element:
|
||||
# <div class="li-item model h-100 ">
|
||||
# <div class="box pos-rel d-flex flex-column h-100">
|
||||
# <div class="item-img pos-rel">
|
||||
# <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
|
||||
# class="d-block"
|
||||
# title=" Scoreland Profile">
|
||||
# <img src="https://cdn77.scoreuniverse.com/shared-bits/images/male-model-placeholder-photo.jpg" />
|
||||
# </a>
|
||||
# </div>
|
||||
# <div class="info t-c p-2">
|
||||
# <div class="t-trunc t-uc">
|
||||
# <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
|
||||
# title=""
|
||||
# aria-label=" Scoreland Profile"
|
||||
# class="i-model accent-text">
|
||||
# </a>
|
||||
# </div>
|
||||
# </div>
|
||||
# </div>
|
||||
# </div>
|
||||
def map_performer(el):
|
||||
url = el.xpath(".//a/@href")[0]
|
||||
if "no-model" in url:
|
||||
return None
|
||||
name = el.xpath(".//a/@title")[1]
|
||||
image = el.xpath(".//img/@src")[0]
|
||||
fixed_url = re.sub(r".*?([^/]*(?=/2/0))/2/0/([^?]*)", r"https://www.\1.com/\2", url)
|
||||
|
||||
if client.head(fixed_url).status_code != 200:
|
||||
log.debug(f"Performer '{name}' has a broken profile link, skipping")
|
||||
return None
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"url": fixed_url,
|
||||
"image": image,
|
||||
}
|
||||
|
||||
|
||||
def performer_query(query: str):
|
||||
# Form data to be sent as the POST request body
|
||||
payload = {
|
||||
"ci_csrf_token": "",
|
||||
"keywords": query,
|
||||
"s_filters[site]": "all",
|
||||
"s_filters[type]": "models",
|
||||
"m_filters[sort]": "top_rated",
|
||||
"m_filters[gender]": "any",
|
||||
"m_filters[body_type]": "any",
|
||||
"m_filters[race]": "any",
|
||||
"m_filters[hair_color]": "any",
|
||||
}
|
||||
result = client.post("https://www.scoreland.com/search-es/", data=payload)
|
||||
tree = html.fromstring(result.content)
|
||||
performers = [p for x in tree.find_class("model") if (p := map_performer(x))]
|
||||
|
||||
if not performers:
|
||||
log.warning(f"No performers found for '{query}'")
|
||||
return performers
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser("ScoreGroup Scraper", argument_default="")
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="operation", help="Operation to perform", required=True
|
||||
)
|
||||
subparsers.add_parser("search", help="Search for performers").add_argument(
|
||||
"name", nargs="?", help="Name to search for"
|
||||
)
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
args = parser.parse_args()
|
||||
log.debug(f"Arguments from commandline: {args}")
|
||||
# Script is being piped into, probably by Stash
|
||||
if not sys.stdin.isatty():
|
||||
try:
|
||||
frag = json.load(sys.stdin)
|
||||
args.__dict__.update(frag)
|
||||
log.debug(f"With arguments from stdin: {args}")
|
||||
except json.decoder.JSONDecodeError:
|
||||
log.error("Received invalid JSON from stdin")
|
||||
sys.exit(1)
|
||||
|
||||
if args.operation == "search":
|
||||
name: str = args.name
|
||||
if not name:
|
||||
log.error("No query provided")
|
||||
sys.exit(1)
|
||||
log.debug(f"Searching for '{name}'")
|
||||
matches = performer_query(name)
|
||||
print(json.dumps(matches))
|
||||
sys.exit(0)
|
||||
|
||||
# Just in case the above if statement doesn't trigger somehow
|
||||
# Something has gone quite wrong should this ever get hit
|
||||
log.error("An error has occured")
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
249
stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.yml
Normal file
249
stash/config/scrapers/community/TheScoreGroup/TheScoreGroup.yml
Normal file
@@ -0,0 +1,249 @@
|
||||
name: TheScoreGroup
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url: &urls
|
||||
- 18eighteen.com
|
||||
- 40somethingmag.com
|
||||
- 50plusmilfs.com
|
||||
- 60plusmilfs.com
|
||||
- bigboobspov.com
|
||||
- bigtitangelawhite.com
|
||||
- bigtithitomi.com
|
||||
- bigtithooker.com
|
||||
- bigtitterrynova.com
|
||||
- bigtitvenera.com
|
||||
- bonedathome.com
|
||||
- bootyliciousmag.com
|
||||
- bustyangelique.com
|
||||
- bustyarianna.com
|
||||
- bustydanniashe.com
|
||||
- bustydustystash.com
|
||||
- bustyinescudna.com
|
||||
- bustykellykay.com
|
||||
- bustykerrymarie.com
|
||||
- bustylornamorga.com
|
||||
- bustymerilyn.com
|
||||
- bustyoldsluts.com
|
||||
- chloesworld.com
|
||||
- christymarks.com
|
||||
- cock4stepmom.com
|
||||
- creampieforgranny.com
|
||||
- crystalgunnsworld.com
|
||||
- daylenerio.com
|
||||
- desiraesworld.com
|
||||
- evanottyvideos.com
|
||||
- feedherfuckher.com
|
||||
- flatandfuckedmilfs.com
|
||||
- homealonemilfs.com
|
||||
- karinahart.com
|
||||
- legsex.com
|
||||
- mickybells.com
|
||||
- milftugs.com
|
||||
- mommystoytime.com
|
||||
- naughtymag.com
|
||||
- pickinguppussy.com
|
||||
- pornmegaload.com
|
||||
- reneerossvideos.com
|
||||
- scoreclassics.com
|
||||
- scoreland.com
|
||||
- scoreland2.com
|
||||
- scorevideos.com
|
||||
- sharizelvideos.com
|
||||
- stacyvandenbergboobs.com
|
||||
- tawny-peaks.com
|
||||
- titsandtugs.com
|
||||
- valoryirene.com
|
||||
- xlgirls.com
|
||||
- yourwifemymeat.com
|
||||
scraper: sceneScraper
|
||||
galleryByURL:
|
||||
- action: scrapeXPath
|
||||
url: *urls
|
||||
scraper: galleryScraper
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url: *urls
|
||||
scraper: performerScraper
|
||||
performerByName:
|
||||
action: script
|
||||
script:
|
||||
- python
|
||||
- TheScoreGroup.py
|
||||
- search
|
||||
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$url: //link[@rel="canonical"]/@href
|
||||
$videopage: //section[@id="videos_page-page" or @id="mixed_page-page"]
|
||||
$stat: //div[contains(concat(' ',normalize-space(@class),' '),' mb-3 ')]
|
||||
scene:
|
||||
Title: $videopage//h1
|
||||
# Original studio is determinable by looking at the CDN links (<source src="//cdn77.scoreuniverse.com/naughtymag/scenes...) this helps set studio for Pornmegaload scenes as nothing is released directly from PML."
|
||||
Studio:
|
||||
Name:
|
||||
selector: ($videopage//video/source/@src)[1]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\.com/(.+?)\/(video|scene).*
|
||||
with: $1
|
||||
- map: &studioMap
|
||||
18eighteen: 18 Eighteen
|
||||
40somethingmag: 40 Something Mag
|
||||
50plusmilfs: 50 Plus MILFs
|
||||
60plusmilfs: 60 Plus MILFs
|
||||
bigboobspov: Big Boobs POV
|
||||
bigtitangelawhite: Big Tit Angela White
|
||||
bigtithitomi: Big Tit Hitomi
|
||||
bigtithooker: Big Tit Hooker
|
||||
bigtitterrynova: Big Tit Terry Nova
|
||||
bigtitvenera: Big Tit Venera
|
||||
bonedathome: Boned At Home
|
||||
bootyliciousmag: Bootylicious Mag
|
||||
bustyangelique: Busty Angelique
|
||||
bustyarianna: Busty Arianna
|
||||
bustydanniashe: Busty Danni Ashe
|
||||
bustydustystash: busty Dusty Stash
|
||||
bustyinescudna: Busty Ines Cudna
|
||||
bustykellykay: Busty Kelly Kay
|
||||
bustykerrymarie: Busty Kerry Marie
|
||||
bustylornamorga: Busty Lorna Morga
|
||||
bustymerilyn: Busty Merilyn
|
||||
bustyoldsluts: Busty Old Sluts
|
||||
chloesworld: Chloe's World
|
||||
christymarks: Christy Marks
|
||||
cock4stepmom: Cock 4 Stepmom
|
||||
creampieforgranny: Creampie for Granny
|
||||
crystalgunnsworld: Crystal Gunns World
|
||||
daylenerio: Daylene Rio
|
||||
desiraesworld: Desiraes World
|
||||
evanottyvideos: Eva Notty Videos
|
||||
feedherfuckher: Feed Her Fuck Her
|
||||
flatandfuckedmilfs: Flat and Fucked MILFs
|
||||
homealonemilfs: Home Alone MILFs
|
||||
karinahart: Karina Hart
|
||||
legsex: Leg Sex
|
||||
mickybells: Micky Bells
|
||||
milftugs: MILF Tugs
|
||||
mommystoytime: Mommy's Toy Time
|
||||
naughtymag: Naughty Mag
|
||||
pickinguppussy: Picking Up Pussy
|
||||
pornmegaload: Porn Mega Load
|
||||
reneerossvideos: Renee Ross Video
|
||||
scoreclassics: Score Classics
|
||||
# Scoreland2 needs to come before scoreland because it's more specific
|
||||
scoreland2: Scoreland2
|
||||
scoreland: Scoreland
|
||||
scorevideos: Score Videos
|
||||
sharizelvideos: Sha Rizel Videos
|
||||
stacyvandenbergboobs: Stacy Vandenberg Boobs
|
||||
tawny-peaks: Tawny Peaks
|
||||
titsandtugs: Tits And Tugs
|
||||
valoryirene: Valory Irene
|
||||
xlgirls: XL Girls
|
||||
yourwifemymeat: Your Wife My Meat
|
||||
Date: &dateAttr
|
||||
selector: $videopage$stat//span[contains(.,"Date:")]/following-sibling::span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ..,
|
||||
with:
|
||||
- parseDate: January 2 2006
|
||||
Details: &details
|
||||
selector: $videopage//div[@class="p-desc p-3"]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: Read More »
|
||||
with:
|
||||
# Attempt to fix erroneous line breaks where HTML tags existed
|
||||
- regex: \n\n([0-9a-zA-Z\.]+)\n\n
|
||||
with: " $1 "
|
||||
concat: "\n\n"
|
||||
Tags:
|
||||
Name: $videopage//a[contains(@href, "videos-tag") or contains(@href, "scenes-tag")]
|
||||
Performers: &performersAttr
|
||||
Name: $videopage//span[contains(.,"Featuring:")]/following-sibling::span/a
|
||||
Image:
|
||||
# This selector scrapes the canonical scene page cover image
|
||||
selector: //script[contains(text(), "poster")]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+poster.+'(.+jpg)'.+$
|
||||
with: $1
|
||||
- regex: ^//
|
||||
with: https://
|
||||
# This Selector scrapes the image posted on social media sites
|
||||
# selector: //meta[@itemprop="image"]/@content
|
||||
# Enable this post process if you want better image quality but sometimes it can fail
|
||||
# postProcess:
|
||||
# - replace:
|
||||
# - regex: _lg
|
||||
# with: _x_800
|
||||
URL: &urlAttr
|
||||
selector: $url
|
||||
Code: &codeAttr
|
||||
selector: $url
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\/(\d+)\/?$
|
||||
with: $1
|
||||
galleryScraper:
|
||||
common:
|
||||
$photopage: //section[@id='photos_page-page']
|
||||
gallery:
|
||||
Title: //h1
|
||||
Studio:
|
||||
Name:
|
||||
selector: //link[@rel="canonical"]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(https://)?.+?([^\.]+)\.com/.*
|
||||
with: $2
|
||||
- map: *studioMap
|
||||
Date:
|
||||
selector: //div[span[@class="label" and contains(.,"Date")]]/span[@class="value"]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+)(st|nd|rd|th)
|
||||
with: $1
|
||||
- parseDate: January 2, 2006
|
||||
Details:
|
||||
selector: $photopage//div[contains(@class, 'p-desc')]/text()
|
||||
concat: "\n"
|
||||
Tags:
|
||||
Name: //h3/following-sibling::a[not(contains(@class,"btn-1"))]/text()
|
||||
Performers:
|
||||
Name: //div[span[@class="label" and contains(.,"Featuring")]]/span[@class="value"]//a/text()
|
||||
|
||||
performerScraper:
|
||||
performer:
|
||||
Name: //h1[@class="model-title"]
|
||||
Gender:
|
||||
fixed: Female
|
||||
URL: //meta[@property="og:url"]/@content
|
||||
Ethnicity:
|
||||
selector: //span[text()="Ethnicity:"]/following-sibling::span
|
||||
postProcess:
|
||||
- map:
|
||||
White: Caucasian
|
||||
HairColor: //span[text()="Hair Color:"]/following-sibling::span
|
||||
Height:
|
||||
selector: //span[text()="Height:"]/following-sibling::span
|
||||
postProcess:
|
||||
- feetToCm: true
|
||||
Weight:
|
||||
selector: //span[text()="Weight:"]/following-sibling::span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+).*
|
||||
with: $1
|
||||
- lbToKg: true
|
||||
Measurements:
|
||||
selector: //span[text()="Bra Size:"]/following-sibling::span|//span[text()="Measurements:"]/following-sibling::span
|
||||
concat: "-"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+)
|
||||
with: $1$2
|
||||
Image: //section[@id="model-page"]//img[@class="lazyload"]/@src
|
||||
# Last Updated April 08, 2024
|
||||
10
stash/config/scrapers/community/TheScoreGroup/manifest
Executable file
10
stash/config/scrapers/community/TheScoreGroup/manifest
Executable file
@@ -0,0 +1,10 @@
|
||||
id: TheScoreGroup
|
||||
name: TheScoreGroup
|
||||
metadata: {}
|
||||
version: 2cd09ad
|
||||
date: "2024-04-08 00:18:22"
|
||||
requires: []
|
||||
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
|
||||
files:
|
||||
- TheScoreGroup.yml
|
||||
- TheScoreGroup.py
|
||||
Reference in New Issue
Block a user