This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
import json
import os
import re
import requests
import sys
import py_common.log as log
try:
from lxml import html
except ModuleNotFoundError:
log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): python -m pip install lxml")
sys.exit()
kgs_per_lb = 0.45359237
cms_per_in = 2.54
filename_pattern = re.compile(r"(?P<model_id>[a-z]{3}\d{3})ATK_(?P<movie_id>\d{6})(?P<scene>\d{3})_(?P<resolution>\w+)(?:\.(?P<extension>\w+))?", re.IGNORECASE)
def getSceneByFilename(filename):
# Parse filename
filename_match = filename_pattern.match(filename)
(model_id, movie_id, _, _, _) = filename_match.groups()
# Fetch model page
model_url = f"https://www.atkgirlfriends.com/tour/model/{model_id}"
log.debug(f"Fetching {model_url} ({movie_id})")
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
if (response.url.startswith("https://www.atkgirlfriends.com?nats")):
# Refetch page on cookie failure
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
# Build performer
tree = html.fromstring(response.text)
performer = dict(Gender = "female")
model_profile_wrap_xpath = '//div[contains(@class, "model-profile-wrap")]'
performer["name"] = tree.xpath('//h1[contains(@class, "page-title")]')[0].text
performer["url"] = f"{model_url}/1/atk-girlfriends-{performer['name'].replace(' ', '-')}"
performer["ethnicity"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Ethnicity")]/following-sibling::text()')[0].strip().capitalize()
performer["hair_color"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Hair Color")]/following-sibling::text()')[0].strip().capitalize()
height_ft_ins_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Height")]/following-sibling::text()')[0].strip()
(height_ft_str, height_ins_str) = re.compile(r"(\d+)[\"'](\d+)").findall(height_ft_ins_str)[0]
height_ins = float(height_ft_str) * 12 + float(height_ins_str)
performer["height"] = str(int(height_ins * cms_per_in))
weight_lbs_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Weight")]/following-sibling::text()')[0].strip()
weight_lbs = float(re.compile(r"\d+").findall(weight_lbs_str)[0])
performer["weight"] = str(int(weight_lbs * kgs_per_lb))
performer["measurements"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Bust Size")]/following-sibling::text()')[0].strip()
performer["image"] = tree.xpath(f'{model_profile_wrap_xpath}/img/@src')[0]
# Build scene
scene = dict(studio = dict(name = "ATK Girlfriends"), performers = [performer])
movie_wrap_xpath = f'//img[contains(@src, "/{model_id}/{movie_id}")]/../../../..'
scene["title"] = tree.xpath(f'{movie_wrap_xpath}//h1')[0].text.strip()
scene["details"] = tree.xpath(f'{movie_wrap_xpath}//b[contains(text(), "Description")]/following-sibling::text()')[0].strip()
movie_url_relative = tree.xpath(f'{movie_wrap_xpath}//a/@href')[0]
scene["url"] = f'https://www.atkgirlfriends.com{movie_url_relative}'
scene["image"] = tree.xpath(f'{movie_wrap_xpath}//img/@src')[0]
return scene
input = sys.stdin.read()
match = filename_pattern.search(input)
if (match):
scene = getSceneByFilename(match.group())
output = json.dumps(scene)
print(output)
else:
log.debug("Filename does not match ATKGirlfriends pattern")
print(r"{}")

View File

@@ -0,0 +1,90 @@
name: ATK Girlfriends
# requires: py_common
sceneByFragment:
action: script
script:
- python
- ATKGirlfriends.py
performerByURL:
- action: scrapeXPath
url:
# Trying to scrape without the www. prefix will result in a redirect loop
- www.atkgirlfriends.com/tour/model/
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
# Trying to scrape without the www. prefix will result in a redirect loop
- www.atkgirlfriends.com/tour/movie/
scraper: sceneScraper
xPathScrapers:
performerScraper:
common:
$modelWrap: &modelWrap //div[contains(@class, "model-profile-wrap")]
performer:
Name: //h1[contains(@class, "page-title")]
Gender:
fixed: female
Ethnicity:
selector: $modelWrap/b[contains(text(), "Ethnicity")]/following-sibling::text()
HairColor:
selector: $modelWrap/b[contains(text(), "Hair Color")]/following-sibling::text()
postProcess:
- map:
black: Black
blond: Blonde
brown: Brown
red: Red
white: White
Height:
selector: $modelWrap/b[contains(text(), "Height")]/following-sibling::text()
postProcess:
- feetToCm: true
Weight:
selector: $modelWrap/b[contains(text(), "Weight")]/following-sibling::text()
postProcess:
- replace:
- regex: (\d+).*
with: $1
- lbToKg: true
Measurements:
selector: $modelWrap/b[contains(text(), "Bust Size")]/following-sibling::text()
Image:
selector: $modelWrap/img/@src
sceneScraper:
common:
$movieWrap: //div[contains(@class, "movie-wrap")]
$modelWrap: *modelWrap
scene:
Title: //title
Details: $movieWrap/b[contains(text(), "Description")]/following-sibling::text()
Image:
selector: //video/@poster | //div[@class="flowplayer minimalist is-splash"]/@style
postProcess:
- replace:
- regex: background-image:url\('(.*)'\);
with: $1
Studio:
Name:
fixed: ATK Girlfriends
URL:
fixed: https://www.atkgirlfriends.com/
Tags:
Name:
selector: $movieWrap/b[contains(text(), "Tags")]/following-sibling::text()
postProcess:
- replace:
- regex: \s*,\s*
with: ","
split: ","
Performers:
Name: $modelWrap/text()[1]
driver:
cookies:
- CookieURL: https://www.atkgirlfriends.com
Cookies:
- Name: start_session_galleria
Domain: www.atkgirlfriends.com
Value: stash # Rotate this value if the scraper is blocked. The first request with the new value should fail.
Path: /
# Last Updated March 14, 2024

View File

@@ -0,0 +1,10 @@
id: ATKGirlfriends
name: ATK Girlfriends
metadata: {}
version: 71e0b03
date: "2024-03-15 01:14:32"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- ATKGirlfriends.yml
- ATKGirlfriends.py