This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
import json
import sys
import re
from pathlib import Path
try:
import requests
except ModuleNotFoundError:
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
sys.exit()
def debug(t):
sys.stderr.write(t + "\n")
def query_url(query):
res = requests.get(f"https://www.analvids.com/api/autocomplete/search?q={query}")
data = res.json()
results = data['terms']
if len(results) > 0:
if len(results) > 1:
debug("Multiple results. Taking first.")
return results[0]
def detect_delimiter(title):
delimiters = [" ", "_", "-", "."]
for d in delimiters:
if d in title:
return d
debug(f"Could not determine delimiter of `{title}`")
def find_scene_id(title):
# Remove file extension
title = Path(title).stem
title = title.replace("'", "")
delimiter = detect_delimiter(title)
parts = title.split(delimiter)
for part in parts:
if len(part) > 3:
if re.match(r'^(\w{2,3}\d{3,4})$', part):
if not part[0].isdigit() and part[-1].isdigit():
return part
if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read())
debug(json.dumps(fragment))
scene_id = find_scene_id(fragment['title'])
if not scene_id:
debug(f"Could not determine scene id in title: `{fragment['title']}`")
else:
debug(f"Found scene id: {scene_id}")
result = query_url(scene_id)
if result is not None:
if result["type"] == "scene":
debug(f"Found scene {result['name']}")
fragment["url"] = result["url"]
fragment["title"] = result["name"]
else:
debug("No scenes found")
print(json.dumps(fragment))

View File

@@ -0,0 +1,60 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: "AnalVids"
sceneByURL:
- action: scrapeXPath
url:
- analvids.com/watch/
- pissvids.com/watch/
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- analvids.com/model/
- pissvids.com/model/
scraper: performerScraper
sceneByFragment:
action: script
script:
- python
# use python3 instead if needed
- AnalVids.py
- query
xPathScrapers:
sceneScraper:
common:
$title: //h1[contains(@class, "watch__title")]//text()[not(ancestor::span)]
scene:
Title:
selector: $title
concat: " "
Date:
selector: //i[contains(@class, "bi-calendar3")]/text()
postProcess:
- parseDate: 2006-01-02
Details:
selector: //div[contains(@class, "text-mob-more")]//text()[not(parent::span[contains(@class, "dots")])]
concat: " "
Code:
selector: $title
postProcess:
- replace:
- regex: .+?([A-Z]{2,3}\d+)$|(.+)
with: $1
Performers:
Name: //h1[contains(@class, "watch__title")]//a/text()
URL: //h1[contains(@class, "watch__title")]//a/@href
Studio:
Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/text()
URL: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/@href
Tags:
Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/genre/")]/text()
Image: //video/@data-poster
performerScraper:
performer:
Name: //h1
Country: //a[contains(@href, "nationality")]
Image: //div[contains(@class, 'model__left')]//img/@src
# Last Updated August 16, 2023

View File

@@ -0,0 +1,10 @@
id: AnalVids
name: AnalVids
metadata: {}
version: fbd81c5
date: "2023-11-22 00:31:17"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AnalVids.yml
- AnalVids.py