stash
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
|
||||
import py_common.log as log
|
||||
|
||||
try:
|
||||
from lxml import html
|
||||
except ModuleNotFoundError:
|
||||
log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
|
||||
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): python -m pip install lxml")
|
||||
sys.exit()
|
||||
|
||||
kgs_per_lb = 0.45359237
|
||||
cms_per_in = 2.54
|
||||
filename_pattern = re.compile(r"(?P<model_id>[a-z]{3}\d{3})ATK_(?P<movie_id>\d{6})(?P<scene>\d{3})_(?P<resolution>\w+)(?:\.(?P<extension>\w+))?", re.IGNORECASE)
|
||||
|
||||
def getSceneByFilename(filename):
|
||||
# Parse filename
|
||||
filename_match = filename_pattern.match(filename)
|
||||
(model_id, movie_id, _, _, _) = filename_match.groups()
|
||||
|
||||
# Fetch model page
|
||||
model_url = f"https://www.atkgirlfriends.com/tour/model/{model_id}"
|
||||
log.debug(f"Fetching {model_url} ({movie_id})")
|
||||
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
|
||||
if (response.url.startswith("https://www.atkgirlfriends.com?nats")):
|
||||
# Refetch page on cookie failure
|
||||
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
|
||||
|
||||
# Build performer
|
||||
tree = html.fromstring(response.text)
|
||||
performer = dict(Gender = "female")
|
||||
model_profile_wrap_xpath = '//div[contains(@class, "model-profile-wrap")]'
|
||||
performer["name"] = tree.xpath('//h1[contains(@class, "page-title")]')[0].text
|
||||
performer["url"] = f"{model_url}/1/atk-girlfriends-{performer['name'].replace(' ', '-')}"
|
||||
performer["ethnicity"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Ethnicity")]/following-sibling::text()')[0].strip().capitalize()
|
||||
performer["hair_color"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Hair Color")]/following-sibling::text()')[0].strip().capitalize()
|
||||
height_ft_ins_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Height")]/following-sibling::text()')[0].strip()
|
||||
(height_ft_str, height_ins_str) = re.compile(r"(\d+)[\"'](\d+)").findall(height_ft_ins_str)[0]
|
||||
height_ins = float(height_ft_str) * 12 + float(height_ins_str)
|
||||
performer["height"] = str(int(height_ins * cms_per_in))
|
||||
weight_lbs_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Weight")]/following-sibling::text()')[0].strip()
|
||||
weight_lbs = float(re.compile(r"\d+").findall(weight_lbs_str)[0])
|
||||
performer["weight"] = str(int(weight_lbs * kgs_per_lb))
|
||||
performer["measurements"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Bust Size")]/following-sibling::text()')[0].strip()
|
||||
performer["image"] = tree.xpath(f'{model_profile_wrap_xpath}/img/@src')[0]
|
||||
|
||||
# Build scene
|
||||
scene = dict(studio = dict(name = "ATK Girlfriends"), performers = [performer])
|
||||
movie_wrap_xpath = f'//img[contains(@src, "/{model_id}/{movie_id}")]/../../../..'
|
||||
scene["title"] = tree.xpath(f'{movie_wrap_xpath}//h1')[0].text.strip()
|
||||
scene["details"] = tree.xpath(f'{movie_wrap_xpath}//b[contains(text(), "Description")]/following-sibling::text()')[0].strip()
|
||||
movie_url_relative = tree.xpath(f'{movie_wrap_xpath}//a/@href')[0]
|
||||
scene["url"] = f'https://www.atkgirlfriends.com{movie_url_relative}'
|
||||
scene["image"] = tree.xpath(f'{movie_wrap_xpath}//img/@src')[0]
|
||||
|
||||
return scene
|
||||
|
||||
input = sys.stdin.read()
|
||||
match = filename_pattern.search(input)
|
||||
if (match):
|
||||
scene = getSceneByFilename(match.group())
|
||||
output = json.dumps(scene)
|
||||
print(output)
|
||||
else:
|
||||
log.debug("Filename does not match ATKGirlfriends pattern")
|
||||
print(r"{}")
|
||||
@@ -0,0 +1,90 @@
|
||||
name: ATK Girlfriends
|
||||
# requires: py_common
|
||||
sceneByFragment:
|
||||
action: script
|
||||
script:
|
||||
- python
|
||||
- ATKGirlfriends.py
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
# Trying to scrape without the www. prefix will result in a redirect loop
|
||||
- www.atkgirlfriends.com/tour/model/
|
||||
scraper: performerScraper
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
# Trying to scrape without the www. prefix will result in a redirect loop
|
||||
- www.atkgirlfriends.com/tour/movie/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
performerScraper:
|
||||
common:
|
||||
$modelWrap: &modelWrap //div[contains(@class, "model-profile-wrap")]
|
||||
performer:
|
||||
Name: //h1[contains(@class, "page-title")]
|
||||
Gender:
|
||||
fixed: female
|
||||
Ethnicity:
|
||||
selector: $modelWrap/b[contains(text(), "Ethnicity")]/following-sibling::text()
|
||||
HairColor:
|
||||
selector: $modelWrap/b[contains(text(), "Hair Color")]/following-sibling::text()
|
||||
postProcess:
|
||||
- map:
|
||||
black: Black
|
||||
blond: Blonde
|
||||
brown: Brown
|
||||
red: Red
|
||||
white: White
|
||||
Height:
|
||||
selector: $modelWrap/b[contains(text(), "Height")]/following-sibling::text()
|
||||
postProcess:
|
||||
- feetToCm: true
|
||||
Weight:
|
||||
selector: $modelWrap/b[contains(text(), "Weight")]/following-sibling::text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+).*
|
||||
with: $1
|
||||
- lbToKg: true
|
||||
Measurements:
|
||||
selector: $modelWrap/b[contains(text(), "Bust Size")]/following-sibling::text()
|
||||
Image:
|
||||
selector: $modelWrap/img/@src
|
||||
sceneScraper:
|
||||
common:
|
||||
$movieWrap: //div[contains(@class, "movie-wrap")]
|
||||
$modelWrap: *modelWrap
|
||||
scene:
|
||||
Title: //title
|
||||
Details: $movieWrap/b[contains(text(), "Description")]/following-sibling::text()
|
||||
Image:
|
||||
selector: //video/@poster | //div[@class="flowplayer minimalist is-splash"]/@style
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: background-image:url\('(.*)'\);
|
||||
with: $1
|
||||
Studio:
|
||||
Name:
|
||||
fixed: ATK Girlfriends
|
||||
URL:
|
||||
fixed: https://www.atkgirlfriends.com/
|
||||
Tags:
|
||||
Name:
|
||||
selector: $movieWrap/b[contains(text(), "Tags")]/following-sibling::text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s*,\s*
|
||||
with: ","
|
||||
split: ","
|
||||
Performers:
|
||||
Name: $modelWrap/text()[1]
|
||||
driver:
|
||||
cookies:
|
||||
- CookieURL: https://www.atkgirlfriends.com
|
||||
Cookies:
|
||||
- Name: start_session_galleria
|
||||
Domain: www.atkgirlfriends.com
|
||||
Value: stash # Rotate this value if the scraper is blocked. The first request with the new value should fail.
|
||||
Path: /
|
||||
# Last Updated March 14, 2024
|
||||
10
stash/config/scrapers/community/ATKGirlfriends/manifest
Executable file
10
stash/config/scrapers/community/ATKGirlfriends/manifest
Executable file
@@ -0,0 +1,10 @@
|
||||
id: ATKGirlfriends
|
||||
name: ATK Girlfriends
|
||||
metadata: {}
|
||||
version: 71e0b03
|
||||
date: "2024-03-15 01:14:32"
|
||||
requires: []
|
||||
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
|
||||
files:
|
||||
- ATKGirlfriends.yml
|
||||
- ATKGirlfriends.py
|
||||
Reference in New Issue
Block a user