This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
import sys
import argparse
import json
import os
import requests
import re
# to import from a parent directory we need to add that directory to the system path
csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
sys.path.append(
parent
) # add parent dir to sys path so that we can import py_common from ther
try:
from lxml import html
except ModuleNotFoundError:
print(
"You need to install the lxml module. (https://lxml.de/installation.html#installation)",
file=sys.stderr,
)
print(
"If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml",
file=sys.stderr,
)
sys.exit()
try:
import py_common.log as log
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr,
)
sys.exit()
# Shared client because we're making multiple requests
client = requests.Session()
# Example element:
# <div class="li-item model h-100 ">
# <div class="box pos-rel d-flex flex-column h-100">
# <div class="item-img pos-rel">
# <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
# class="d-block"
# title=" Scoreland Profile">
# <img src="https://cdn77.scoreuniverse.com/shared-bits/images/male-model-placeholder-photo.jpg" />
# </a>
# </div>
# <div class="info t-c p-2">
# <div class="t-trunc t-uc">
# <a href="https://www.scoreland.com/big-boob-models/no-model/0/?nats=MTAwNC4yLjIuMi41NDUuMC4wLjAuMA"
# title=""
# aria-label=" Scoreland Profile"
# class="i-model accent-text">
# </a>
# </div>
# </div>
# </div>
# </div>
def map_performer(el):
url = el.xpath(".//a/@href")[0]
if "no-model" in url:
return None
name = el.xpath(".//a/@title")[1]
image = el.xpath(".//img/@src")[0]
fixed_url = re.sub(r".*?([^/]*(?=/2/0))/2/0/([^?]*)", r"https://www.\1.com/\2", url)
if client.head(fixed_url).status_code != 200:
log.debug(f"Performer '{name}' has a broken profile link, skipping")
return None
return {
"name": name,
"url": fixed_url,
"image": image,
}
def performer_query(query: str):
# Form data to be sent as the POST request body
payload = {
"ci_csrf_token": "",
"keywords": query,
"s_filters[site]": "all",
"s_filters[type]": "models",
"m_filters[sort]": "top_rated",
"m_filters[gender]": "any",
"m_filters[body_type]": "any",
"m_filters[race]": "any",
"m_filters[hair_color]": "any",
}
result = client.post("https://www.scoreland.com/search-es/", data=payload)
tree = html.fromstring(result.content)
performers = [p for x in tree.find_class("model") if (p := map_performer(x))]
if not performers:
log.warning(f"No performers found for '{query}'")
return performers
def main():
parser = argparse.ArgumentParser("ScoreGroup Scraper", argument_default="")
subparsers = parser.add_subparsers(
dest="operation", help="Operation to perform", required=True
)
subparsers.add_parser("search", help="Search for performers").add_argument(
"name", nargs="?", help="Name to search for"
)
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
log.debug(f"Arguments from commandline: {args}")
# Script is being piped into, probably by Stash
if not sys.stdin.isatty():
try:
frag = json.load(sys.stdin)
args.__dict__.update(frag)
log.debug(f"With arguments from stdin: {args}")
except json.decoder.JSONDecodeError:
log.error("Received invalid JSON from stdin")
sys.exit(1)
if args.operation == "search":
name: str = args.name
if not name:
log.error("No query provided")
sys.exit(1)
log.debug(f"Searching for '{name}'")
matches = performer_query(name)
print(json.dumps(matches))
sys.exit(0)
# Just in case the above if statement doesn't trigger somehow
# Something has gone quite wrong should this ever get hit
log.error("An error has occured")
sys.exit(2)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,249 @@
name: TheScoreGroup
sceneByURL:
- action: scrapeXPath
url: &urls
- 18eighteen.com
- 40somethingmag.com
- 50plusmilfs.com
- 60plusmilfs.com
- bigboobspov.com
- bigtitangelawhite.com
- bigtithitomi.com
- bigtithooker.com
- bigtitterrynova.com
- bigtitvenera.com
- bonedathome.com
- bootyliciousmag.com
- bustyangelique.com
- bustyarianna.com
- bustydanniashe.com
- bustydustystash.com
- bustyinescudna.com
- bustykellykay.com
- bustykerrymarie.com
- bustylornamorga.com
- bustymerilyn.com
- bustyoldsluts.com
- chloesworld.com
- christymarks.com
- cock4stepmom.com
- creampieforgranny.com
- crystalgunnsworld.com
- daylenerio.com
- desiraesworld.com
- evanottyvideos.com
- feedherfuckher.com
- flatandfuckedmilfs.com
- homealonemilfs.com
- karinahart.com
- legsex.com
- mickybells.com
- milftugs.com
- mommystoytime.com
- naughtymag.com
- pickinguppussy.com
- pornmegaload.com
- reneerossvideos.com
- scoreclassics.com
- scoreland.com
- scoreland2.com
- scorevideos.com
- sharizelvideos.com
- stacyvandenbergboobs.com
- tawny-peaks.com
- titsandtugs.com
- valoryirene.com
- xlgirls.com
- yourwifemymeat.com
scraper: sceneScraper
galleryByURL:
- action: scrapeXPath
url: *urls
scraper: galleryScraper
performerByURL:
- action: scrapeXPath
url: *urls
scraper: performerScraper
performerByName:
action: script
script:
- python
- TheScoreGroup.py
- search
xPathScrapers:
sceneScraper:
common:
$url: //link[@rel="canonical"]/@href
$videopage: //section[@id="videos_page-page" or @id="mixed_page-page"]
$stat: //div[contains(concat(' ',normalize-space(@class),' '),' mb-3 ')]
scene:
Title: $videopage//h1
# Original studio is determinable by looking at the CDN links (<source src="//cdn77.scoreuniverse.com/naughtymag/scenes...) this helps set studio for Pornmegaload scenes as nothing is released directly from PML."
Studio:
Name:
selector: ($videopage//video/source/@src)[1]
postProcess:
- replace:
- regex: .*\.com/(.+?)\/(video|scene).*
with: $1
- map: &studioMap
18eighteen: 18 Eighteen
40somethingmag: 40 Something Mag
50plusmilfs: 50 Plus MILFs
60plusmilfs: 60 Plus MILFs
bigboobspov: Big Boobs POV
bigtitangelawhite: Big Tit Angela White
bigtithitomi: Big Tit Hitomi
bigtithooker: Big Tit Hooker
bigtitterrynova: Big Tit Terry Nova
bigtitvenera: Big Tit Venera
bonedathome: Boned At Home
bootyliciousmag: Bootylicious Mag
bustyangelique: Busty Angelique
bustyarianna: Busty Arianna
bustydanniashe: Busty Danni Ashe
bustydustystash: busty Dusty Stash
bustyinescudna: Busty Ines Cudna
bustykellykay: Busty Kelly Kay
bustykerrymarie: Busty Kerry Marie
bustylornamorga: Busty Lorna Morga
bustymerilyn: Busty Merilyn
bustyoldsluts: Busty Old Sluts
chloesworld: Chloe's World
christymarks: Christy Marks
cock4stepmom: Cock 4 Stepmom
creampieforgranny: Creampie for Granny
crystalgunnsworld: Crystal Gunns World
daylenerio: Daylene Rio
desiraesworld: Desiraes World
evanottyvideos: Eva Notty Videos
feedherfuckher: Feed Her Fuck Her
flatandfuckedmilfs: Flat and Fucked MILFs
homealonemilfs: Home Alone MILFs
karinahart: Karina Hart
legsex: Leg Sex
mickybells: Micky Bells
milftugs: MILF Tugs
mommystoytime: Mommy's Toy Time
naughtymag: Naughty Mag
pickinguppussy: Picking Up Pussy
pornmegaload: Porn Mega Load
reneerossvideos: Renee Ross Video
scoreclassics: Score Classics
# Scoreland2 needs to come before scoreland because it's more specific
scoreland2: Scoreland2
scoreland: Scoreland
scorevideos: Score Videos
sharizelvideos: Sha Rizel Videos
stacyvandenbergboobs: Stacy Vandenberg Boobs
tawny-peaks: Tawny Peaks
titsandtugs: Tits And Tugs
valoryirene: Valory Irene
xlgirls: XL Girls
yourwifemymeat: Your Wife My Meat
Date: &dateAttr
selector: $videopage$stat//span[contains(.,"Date:")]/following-sibling::span
postProcess:
- replace:
- regex: ..,
with:
- parseDate: January 2 2006
Details: &details
selector: $videopage//div[@class="p-desc p-3"]/text()
postProcess:
- replace:
- regex: Read More »
with:
# Attempt to fix erroneous line breaks where HTML tags existed
- regex: \n\n([0-9a-zA-Z\.]+)\n\n
with: " $1 "
concat: "\n\n"
Tags:
Name: $videopage//a[contains(@href, "videos-tag") or contains(@href, "scenes-tag")]
Performers: &performersAttr
Name: $videopage//span[contains(.,"Featuring:")]/following-sibling::span/a
Image:
# This selector scrapes the canonical scene page cover image
selector: //script[contains(text(), "poster")]
postProcess:
- replace:
- regex: ^.+poster.+'(.+jpg)'.+$
with: $1
- regex: ^//
with: https://
# This Selector scrapes the image posted on social media sites
# selector: //meta[@itemprop="image"]/@content
# Enable this post process if you want better image quality but sometimes it can fail
# postProcess:
# - replace:
# - regex: _lg
# with: _x_800
URL: &urlAttr
selector: $url
Code: &codeAttr
selector: $url
postProcess:
- replace:
- regex: .*\/(\d+)\/?$
with: $1
galleryScraper:
common:
$photopage: //section[@id='photos_page-page']
gallery:
Title: //h1
Studio:
Name:
selector: //link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: ^(https://)?.+?([^\.]+)\.com/.*
with: $2
- map: *studioMap
Date:
selector: //div[span[@class="label" and contains(.,"Date")]]/span[@class="value"]/text()
postProcess:
- replace:
- regex: (\d+)(st|nd|rd|th)
with: $1
- parseDate: January 2, 2006
Details:
selector: $photopage//div[contains(@class, 'p-desc')]/text()
concat: "\n"
Tags:
Name: //h3/following-sibling::a[not(contains(@class,"btn-1"))]/text()
Performers:
Name: //div[span[@class="label" and contains(.,"Featuring")]]/span[@class="value"]//a/text()
performerScraper:
performer:
Name: //h1[@class="model-title"]
Gender:
fixed: Female
URL: //meta[@property="og:url"]/@content
Ethnicity:
selector: //span[text()="Ethnicity:"]/following-sibling::span
postProcess:
- map:
White: Caucasian
HairColor: //span[text()="Hair Color:"]/following-sibling::span
Height:
selector: //span[text()="Height:"]/following-sibling::span
postProcess:
- feetToCm: true
Weight:
selector: //span[text()="Weight:"]/following-sibling::span
postProcess:
- replace:
- regex: (\d+).*
with: $1
- lbToKg: true
Measurements:
selector: //span[text()="Bra Size:"]/following-sibling::span|//span[text()="Measurements:"]/following-sibling::span
concat: "-"
postProcess:
- replace:
- regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+)
with: $1$2
Image: //section[@id="model-page"]//img[@class="lazyload"]/@src
# Last Updated April 08, 2024

View File

@@ -0,0 +1,10 @@
id: TheScoreGroup
name: TheScoreGroup
metadata: {}
version: 2cd09ad
date: "2024-04-08 00:18:22"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- TheScoreGroup.yml
- TheScoreGroup.py