This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
name: 10Musume
sceneByURL:
- action: scrapeJson
url:
- en.10musume.com/movies/
scraper: sceneScraper
queryURL: "https://en.10musume.com/dyn/phpauto/movie_details/movie_id/{url}.json"
queryURLReplace:
url:
- regex: '.+/movies/(\d{6})_(\d{2}).+'
with: "${1}_${2}"
sceneByFragment:
action: scrapeJson
queryURL: "https://en.10musume.com/dyn/phpauto/movie_details/movie_id/{filename}.json"
scraper: sceneScraper
queryURLReplace:
filename:
- regex: '.*(\d{6})[_.-](\d{2}).+'
with: "${1}_${2}"
jsonScrapers:
sceneScraper:
scene:
Title:
selector: "[TitleEn,Title]"
concat: "|" # get rid of empty result
split: "|"
Details:
selector: "[DescEn,Desc]"
concat: "|" # get rid of empty result
split: "|"
postProcess:
- replace:
- regex: ^\s+(.+)\s+$
with: $1
Date:
selector: Release
postProcess:
- parseDate: 2006-01-02
Image: ThumbHigh
Performers:
Name: ActressesEn
Studio:
Name:
fixed: 10Musume
Tags:
Name: UCNAMEEn
URL:
selector: MovieID
postProcess:
- replace:
- regex: ^
with: "https://en.10musume.com/movies/"
- regex: $
with: "/"
# Last Updated November 09, 2021

View File

@@ -0,0 +1,9 @@
id: 10Musume
name: 10Musume
metadata: {}
version: d99c8c2
date: "2021-11-14 11:30:04"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 10Musume.yml

View File

@@ -0,0 +1,35 @@
name: "1 Pass For All Sites"
sceneByURL:
- action: scrapeXPath
url:
- 1passforallsites.com/episode/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
selector: //a[contains(@href,'?site=')]
Title:
selector: //title
postProcess:
- replace:
- regex: (^.+) - 1 .+$
with: $1
Details: //div[@class="sp-info-txt"]/p/text()
Performers:
Name:
selector: //p[@class="sp-info-name"]/a/text()
Tags:
Name:
selector: //p[@class="niches-list"]/a/text()
Date:
selector: //li[contains(text(),"Added:")]
postProcess:
- replace:
- regex: "Added\\: (.+)"
with: $1
- parseDate: 2 Jan 2006
Image: //video/@poster
# Last Updated July 12, 2023

View File

@@ -0,0 +1,9 @@
id: 1passforallsites
name: 1 Pass For All Sites
metadata: {}
version: 81ddf2c
date: "2023-07-13 02:50:30"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 1passforallsites.yml

View File

@@ -0,0 +1,53 @@
name: 1Pondo
sceneByURL:
- action: scrapeJson
url:
- en.1pondo.tv/movies/
scraper: sceneScraper
queryURL: "https://en.1pondo.tv/dyn/phpauto/movie_details/movie_id/{url}.json"
queryURLReplace:
url:
- regex: '.+/movies/(\d{6})_(\d{3}).+'
with: "${1}_${2}"
sceneByFragment:
action: scrapeJson
queryURL: "https://en.1pondo.tv/dyn/phpauto/movie_details/movie_id/{filename}.json"
scraper: sceneScraper
queryURLReplace:
filename:
- regex: '.*(\d{6})[_.-](\d{3}).+'
with: "${1}_${2}"
jsonScrapers:
sceneScraper:
scene:
Title:
selector: "[TitleEn,Title]"
concat: "|" # get rid of empty result
split: "|"
Details:
selector: DescEn
postProcess:
- replace:
- regex: ^\s+(.+)\s+$
with: $1
Date:
selector: Release
postProcess:
- parseDate: 2006-01-02
Image: ThumbHigh
Performers:
Name: ActressesEn
Studio:
Name:
fixed: 1Pondo
Tags:
Name: UCNAMEEn
URL:
selector: MovieID
postProcess:
- replace:
- regex: ^
with: "https://en.1pondo.tv/movies/"
- regex: $
with: "/"
# Last Updated April 30, 2021

View File

@@ -0,0 +1,9 @@
id: 1pondo
name: 1Pondo
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 1pondo.yml

View File

@@ -0,0 +1,31 @@
# requires: Algolia
name: "21Naturals"
sceneByURL:
- action: script
url:
- 21naturals.com/en/video
script:
- python
- ../Algolia/Algolia.py
- 21naturals
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21naturals
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21naturals
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21naturals
- validName
# Last Updated March 23, 2022

View File

@@ -0,0 +1,9 @@
id: 21Naturals
name: 21Naturals
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 21Naturals.yml

View File

@@ -0,0 +1,61 @@
name: "21Roles"
sceneByURL:
- action: scrapeXPath
url:
- 21roles.com/game/DisplayPlayer/gameId/
- lifeselector.com/game/DisplayPlayer/gameId/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //title/text()
postProcess:
- replace:
- regex: .+(?:DisplayPlayer\s-\s)(.+)(?:\s-\sDisplayPlayer)+
with: $1
Details: //div[@class="info"]/p/text()
Tags:
Name:
selector: //title/text()
postProcess:
- replace:
- regex: .+(?:DisplayPlayer\s-\s)(.+)(?:\s-\sDisplayPlayer)+
with: https://21roles.com/site/search/keyword/$1
- regex: (.+)(?:\s-\sInteractive).+
with: https://lifeselector.com/site/search/keyword/$1
- subScraper:
selector: //div[@class="details"]/div[contains(.,'Labels')]//a/text()
concat: ","
split: ","
Performers:
Name:
selector: //div[@class="modelBlock"]/div[@class="description"]/h1/a/text()
postProcess:
- replace:
- regex: .+(?:\/)(\d+)+
with: https://21roles.com/game/DisplayPlayer/gameId/$1/view/cast
- subScraper:
selector: //div[@class="content"]//h1/a/text()
concat: ","
split: ","
Image:
selector: //div[@class="signup-right-col"]//input[@id="requestUri"]/@value
postProcess:
- replace:
- regex: .+(?:\/)(\d+)+
with: https://i.c7cdn.com/generator/games/$1/images/poster/1_size1600.jpg
Studio:
Name:
selector: //meta[@property='og:site_name']/@content
postProcess:
- map:
21roles.com: '21Roles'
LifeSelector: 'LifeSelector'
# Driver is used only to grab tags, if you are unable to use CDP and don't mind
# losing the tags, comment out or remove the driver lines
driver:
useCDP: true
# Last Updated November 08, 2020

View File

@@ -0,0 +1,9 @@
id: 21Roles
name: 21Roles
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 21Roles.yml

View File

@@ -0,0 +1,31 @@
# requires: Algolia
name: "21Sextreme"
sceneByURL:
- action: script
url:
- 21sextreme.com/en/video
script:
- python
- ../Algolia/Algolia.py
- 21sextreme
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextreme
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextreme
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextreme
- validName
# Last Updated March 23, 2022

View File

@@ -0,0 +1,9 @@
id: 21Sextreme
name: 21Sextreme
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 21Sextreme.yml

View File

@@ -0,0 +1,40 @@
# requires: Algolia
name: "21Sextury"
sceneByURL:
- action: script
url:
- 21sextury.com/en/video
script:
- python
- ../Algolia/Algolia.py
- 21sextury
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextury
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextury
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 21sextury
- validName
galleryByURL:
- action: script
url:
- 21sextury.com/en/photo/
script:
- python
- ../Algolia/Algolia.py
- 21sextury
- gallery
# Last Updated December 22, 2022

View File

@@ -0,0 +1,9 @@
id: 21Sextury
name: 21Sextury
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 21Sextury.yml

View File

@@ -0,0 +1,49 @@
# requires: Algolia
name: "3rdDegreeFilms"
sceneByURL:
- action: script
url:
- www.3rddegreefilms.com/en/video
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
- validName
movieByURL:
- action: script
url:
- 3rddegreefilms.com/en/movie
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
- movie
galleryByURL:
- action: script
url:
- 3rddegreefilms.com/en/photo/
script:
- python
- ../Algolia/Algolia.py
- 3rddegreefilms
- gallery
# Last Updated March 13, 2024

View File

@@ -0,0 +1,9 @@
id: 3rdDegreeFilms
name: 3rdDegreeFilms
metadata: {}
version: c616a0d
date: "2024-03-14 00:33:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 3rdDegreeFilms.yml

View File

@@ -0,0 +1,54 @@
name: 5Kteens
sceneByURL:
- action: scrapeXPath
url:
- 5kteens.com/episodes
- 5kporn.com/episodes
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
selector: //span[starts-with(@class, "color-")]/@class
postProcess:
- map:
color-5KT: 5Kteens
color-5KP: 5Kporn
Title: //p[@class="trailer-title"]
Details: //p[@class=""]
Performers:
Name:
selector: //h5[text()[contains(.,'Starring')]]
postProcess:
- replace:
- regex: Starring:\s([\w\s]+)
with: $1
Date:
selector: //h5[text()[contains(.,'Published')]]
postProcess:
- replace:
- regex: Published\:\s(\d{4}-\d{2}-\d{2})
with: $1
Image:
selector: //img[@class="trailer-poster"]/@src | //script[contains(., "videojs(")]
postProcess:
- replace:
- regex: '^.+poster: "(.+?)".+$'
with: $1
driver:
cookies:
- CookieURL: "https://www.5kteens.com/"
Cookies:
- Name: "nats"
Domain: ".5kteens.com"
ValueRandom: 23
Path: "/"
- CookieURL: "https://www.5kporn.com/"
Cookies:
- Name: "nats"
Domain: ".5kporn.com"
ValueRandom: 23
Path: "/"
# Last Updated April 09, 2021

View File

@@ -0,0 +1,9 @@
id: 5KTeens
name: 5Kteens
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- 5KTeens.yml

View File

@@ -0,0 +1,22 @@
name: AD4X
sceneByURL:
- action: scrapeXPath
url:
- ad4x.com/tour
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //title
Details: //div[contains(@class,'description')]
Date:
selector: //span[@class='date']
postProcess:
- parseDate: 01/02/2006
Image: //video/@poster
Performers:
Name: //span[contains(text(),'Starring')]/following-sibling::a/text()
Studio:
Name:
fixed: AD4X
# Last Updated March 27, 2024

View File

@@ -0,0 +1,9 @@
id: AD4X
name: AD4X
metadata: {}
version: 1fe074e
date: "2024-03-28 00:48:31"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AD4X.yml

View File

@@ -0,0 +1,116 @@
name: "AEBN"
performerByURL:
- action: scrapeXPath
url:
- aebn.com
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
- aebn.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- aebn.com
scraper: movieScraper
xPathScrapers:
performerScraper:
performer:
Name: //div[@class='dts-section-page-heading-title']/h1
Gender:
selector: //li[@class="section-detail-list-item-gender"]/text()
postProcess:
- map:
TS: transgender_female
URL: //link[@rel="canonical"]/@href
Birthdate:
selector: //div[@class='section-detail dts-list-attributes']/ul/li[contains(.,"Birth Date")]
postProcess:
- replace:
- regex: .+:\s(.+)
with: $1
- regex: "Sept"
with: "Sep"
- parseDate: Jan 2, 2006
Height:
selector: //li[@class='section-detail-list-item-height']/text()
postProcess:
- replace:
- regex: .+\((\d+).+\)
with: $1
Weight:
selector: //li[@class='section-detail-list-item-weight']/text()
postProcess:
- replace:
- regex: .+\((\d+).+\)
with: $1
EyeColor:
selector: //li[@class='section-detail-list-item-eye-color']/text()
Ethnicity:
selector: //li[@class='section-detail-list-item-ethnicity']/text()
postProcess:
- map:
White: Caucasian
HairColor:
selector: //li[@class='section-detail-list-item-hair-color']/text()
Details:
selector: //div[@class='dts-section-page-detail-description-body']
Image:
selector: //div[@class='dts-section-page-detail-main-image-wrapper']/picture/img/@src
postProcess:
- replace:
- regex: ^([^?]+).*$
with: "https:$1"
sceneScraper:
scene:
Title: //h1[@class="dts-section-page-heading-title"]|//div[@class="dts-section-page-heading-title"]/h1
Date:
selector: //li[@class="section-detail-list-item-release-date"]/text()
postProcess:
- replace:
- regex: "Sept"
with: "Sep"
- parseDate: Jan 2, 2006
Details:
selector: //div[@class="dts-section-page-detail-description-body"]//text()
Performers:
Name: //div[@class="dts-star-name-overlay"]/text()
Tags:
Name: //span[@class="dts-image-display-name"]//text()
Image:
selector: //picture[@class="dts-movie-boxcover-front"]/img/@src
postProcess:
- replace:
- regex: ^([^?]+).*$
with: "https:$1"
movieScraper:
movie:
Name: //h1[@class="dts-section-page-heading-title"]|//div[@class="dts-section-page-heading-title"]/h1
Director:
selector: //li[@class='section-detail-list-item-director']//span//a
concat: ", "
Duration: //li[@class='section-detail-list-item-duration'][contains(span,"Running Time")]/text()
Date:
selector: //li[@class="section-detail-list-item-release-date"]/text()
postProcess:
- replace:
- regex: "Sept"
with: "Sep"
- parseDate: Jan 2, 2006
Synopsis: //div[@class="dts-section-page-detail-description-body"]//text()
Studio:
Name: //div[@class='dts-studio-name-wrapper']/a/text()
FrontImage:
selector: //picture[@class="dts-movie-boxcover-front"]/img/@src
postProcess:
- replace:
- regex: ^([^?]+).*$
with: "https:$1"
BackImage:
selector: //picture[@class="dts-movie-boxcover-back"]/img/@src
postProcess:
- replace:
- regex: ^([^?]+).*$
with: "https:$1"
# Last Updated July 06, 2022

View File

@@ -0,0 +1,9 @@
id: AEBN
name: AEBN
metadata: {}
version: faf24f7
date: "2022-07-06 22:03:43"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AEBN.yml

View File

@@ -0,0 +1,89 @@
# This one convers a LOT of sites (including the PornPros Network). Almost
# all the sites follow the same structure, making this mega scraper possible.
# I got the list of sites from here: https://www.thebestporn.com/review_company.html?id=2085
# Note: The brands associated directly with the PornPros network, require you
# get a scene link from PornPros.com directly, as the individual sites do not
# have scene links. These sites are:
# publicviolations.com jurassiccock.com 18yearsold.com 40ozbounce.com
# cockcompetition.com crueltyparty.com cumshotsurprise.com deepthroatlove.com
# disgraced18.com eurohumpers.com flexiblepositions.com freaksofboobs.com
# freaksofcock.com pimpparade.com shadypi.com squirtdisgrace.com
# There are a small number of sites where the scene links don't exist on
# PornPros.com OR their own sites, and are not supported. These are:
# amateurviolations.com cumdisgrace.com lolcumshots.com massagecreep.com
# milfhumiliation.com webcamhackers.com
name: "AMA Multimedia"
sceneByURL:
- action: scrapeXPath
url:
# Straight Porn
- anal4k.com/video/
- baeb.com/video/
- bbcpie.com/video/
- castingcouch-x.com/video/
- creepypa.com/video/
- cum4k.com/video/
- exotic4k.com/video/
- facials4k.com/video/
- fantasyhd.com/video/
- girlcum.com/video/
- holed.com/video/
- lubed.com/video/
- mom4k.com/video/
- momcum.com/video/
- myveryfirsttime.com/video/
- nannyspy.com/video/
- passion-hd.com/video/
- pornpros.com/video/
- povd.com/video/
- puremature.com/video/
- spyfam.com/video/
- tiny4k.com/video/
- wetvr.com/video/
# Gay Porn
- bathhousebait.com/video/
- boysdestroyed.com/video/
- damnthatsbig.com/video/
- gaycastings.com/video/
- gaycreeps.com/video/
- gayroom.com/video/
- gayviolations.com/video/
- manroyale.com/video/
- massagebait.com/video/
- menpov.com/video/
- officecock.com/video/
- outhim.com/video/
- showerbait.com/video/
- thickandbig.com/video/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$scene: //div[@id="trailer_player"]
scene:
Title: $scene//h1
Details: $scene//*[contains(@class, "fa-quote-right")]/following-sibling::span
Performers:
Name: $scene//a[contains(@href, "models")]
Image:
selector: >-
$scene//video[@id="player"]/@poster |
$scene//deo-video/@cover-image |
$scene//img[@id="no-player-image"]/@src
postProcess:
- replace:
- regex: ([?&]img[wh]=\d+)+$
with:
Studio:
Name:
selector: //p[contains(text(), "Copyright ©")]
postProcess:
- replace:
- regex: .*?\d{4}\s+([^.]+).*
with: $1
# Last Updated March 22, 2024

View File

@@ -0,0 +1,9 @@
id: AMAMultimedia
name: AMA Multimedia
metadata: {}
version: 266340d
date: "2024-03-22 01:49:25"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AMAMultimedia.yml

View File

@@ -0,0 +1,37 @@
name: A POV Story
sceneByURL:
- action: scrapeXPath
url:
- apovstory.com/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
fixed: "A POV Story"
Title:
selector: //div[@class = 'trailerArea centerwrap']/h3
Details:
selector: //div[@class = 'trailerContent']//*//text()
concat: "\n\n"
postProcess:
- replace:
- regex: ^Description:\s*
with:
Tags:
Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text()
Performers:
Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text()
Image:
selector: //div[@class="player-thumb"]/img/@src0_3x
postProcess:
- replace:
- regex: ^
with: "https://apovstory.com"
Date:
selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text()
postProcess:
- parseDate: "January 2, 2006"
# Last Updated August 24, 2023

View File

@@ -0,0 +1,9 @@
id: APOVStory
name: A POV Story
metadata: {}
version: 729a760
date: "2023-08-26 15:01:05"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- APOVStory.yml

View File

@@ -0,0 +1,123 @@
name: AShemaleTube
performerByName:
action: scrapeXPath
queryURL: "https://www.ashemaletube.com/models/?modelsearchSubmitCheck=FORM_SENDED&key=models&mode=model-search&searchName={}&submitModelSearch=Search&filterCountry=&filterHair=&filterEthnicity=&filterEyes=&filterPenis=&filterBreast=&mode=model-search"
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- ashemaletube.com/model/
scraper: performerScraper
sceneByName:
action: scrapeXPath
queryURL: "https://www.ashemaletube.com/search/{}/?sort=re"
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
sceneByURL:
- action: scrapeXPath
url:
- ashemaletube.com/videos
scraper: sceneScraper
xPathScrapers:
performerSearch:
common:
$listAnchor: //div[@class="models-list"]//div[@class="modelspot modelItem"]/a
performer:
Name:
selector: $listAnchor/@title
URL:
selector: $listAnchor/@href
postProcess:
- replace:
- regex: ^
with: https://www.ashemaletube.com
performerScraper:
common:
$infoPiece: //div[@class="info-box info"]
performer:
Name: //h1[@class="username "]
Aliases: $infoPiece/ul/li/span[text()="AKA:"]/../text()[2]
Birthdate:
selector: $infoPiece/ul/li/span[text()="Date of Birth:"]/../text()[2]
postProcess:
- parseDate: "2 January 2006"
DeathDate:
selector: $infoPiece/ul/li/span[text()="Status:"]/../text()[2]
postProcess:
- replace:
- regex: '^.*\son\s'
with: ""
- regex: (\d)(?:st|nd|rd|th)
with: $1
- parseDate: 2 January 2006
Country: $infoPiece/ul/li/span[text()="Country:"]/../text()[2]
EyeColor: $infoPiece/ul/li/span[text()="Eye Color:"]/../text()[2]
HairColor: $infoPiece/ul/li/span[text()="Hair Color:"]/../text()[2]
Ethnicity: $infoPiece/ul/li/span[text()="Ethnicity:"]/../text()[2]
Height:
selector: $infoPiece/ul/li/span[text()="Height:"]/../text()[2]
postProcess:
- replace:
- regex: (\d+) cm.*
with: $1
Tags:
Name: //a[@class="tag-item"]/text()
Image: //div[@class="user-photo"]/img/@src
Twitter:
selector: //a[@class=" social-twitter"]/@href
postProcess:
- replace:
- regex: ^/
with: https://www.ashemaletube.com/
- subScraper: //link[@rel="alternate"][1]/@href
URL: //link[@rel="canonical"]/@href
sceneSearch:
common:
$scenerow: //div[@id="maincolumn"]/ul/li
scene:
Title: $scenerow//p/a/@title
URL:
selector: $scenerow//p/a/@href
postProcess:
- replace:
- regex: ^
with: https://www.ashemaletube.com
Image: $scenerow/div/span/a/img/@src
sceneScraper:
common:
$infoPiece: //div[@id="item-info"]
scene:
Title: $infoPiece//h1/text()
Date:
selector: $infoPiece//div[contains(@class, "views-count-add")]/text()
postProcess:
- replace:
- regex: Added\s(.*)
with: $1
- parseDate: 2006-01-02
Tags:
Name: //a[@class="tag-item"]/@title
Performers:
Name:
selector: //a[@class="model-card"]/text()[2]
postProcess:
- replace:
- regex: \n\t(.*)\n
with: $1
URL:
selector: //a[@class="model-card"]/@href
postProcess:
- replace:
- regex: ^/
with: https://www.ashemaletube.com/
Image: //meta[@property="og:image"]/@content
URL: //meta[@property="og:url"]/@content
# Last Updated September 29, 2022

View File

@@ -0,0 +1,9 @@
id: AShemaleTube
name: AShemaleTube
metadata: {}
version: dbca26d
date: "2022-10-07 22:10:55"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AShemaleTube.yml

View File

@@ -0,0 +1,55 @@
name: ATKExotics
sceneByURL:
- action: scrapeXPath
url:
- atkexotics.com/tour/
- amkingdom.com/tour/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
selector: //div[@class='statement']/a[contains(@href,'2257')]/@href
postProcess:
- replace:
- regex: (?:.+www\.)(.+)(?:\.com.+)
with: $1
- map:
atkexotics: "ATK Exotics"
amkingdom: "AMKingdom"
Performers:
Name:
selector: //h1
postProcess:
- replace:
- regex: \sVideo.*
with:
Title: //title/text()
Details:
selector: //div/b[contains(text(),'Description:')]//following-sibling::text()[1]
Tags:
Name:
selector: //div/b[contains(text(),'Tags:')]//following-sibling::text()[1]
split: " , "
Image:
selector: //div[contains(@class,'video-wrap')]/div/@style
postProcess:
- replace:
- regex: (?:background-image:url\(')(.+)(?:'\));?
with: $1
driver:
useCDP: true
cookies:
- Cookies:
- Name: __cfduid
ValueRandom: 43
Domain: .amkingdom.com
Path: /
- Cookies:
- Name: __cfduid
ValueRandom: 43
Domain: .atkexotics.com
Path: /
# Last Updated January 27, 2022

View File

@@ -0,0 +1,9 @@
id: ATKExotics
name: ATKExotics
metadata: {}
version: 1f1f042
date: "2022-01-27 19:05:53"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- ATKExotics.yml

View File

@@ -0,0 +1,70 @@
import json
import os
import re
import requests
import sys
import py_common.log as log
try:
from lxml import html
except ModuleNotFoundError:
log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): python -m pip install lxml")
sys.exit()
kgs_per_lb = 0.45359237
cms_per_in = 2.54
filename_pattern = re.compile(r"(?P<model_id>[a-z]{3}\d{3})ATK_(?P<movie_id>\d{6})(?P<scene>\d{3})_(?P<resolution>\w+)(?:\.(?P<extension>\w+))?", re.IGNORECASE)
def getSceneByFilename(filename):
# Parse filename
filename_match = filename_pattern.match(filename)
(model_id, movie_id, _, _, _) = filename_match.groups()
# Fetch model page
model_url = f"https://www.atkgirlfriends.com/tour/model/{model_id}"
log.debug(f"Fetching {model_url} ({movie_id})")
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
if (response.url.startswith("https://www.atkgirlfriends.com?nats")):
# Refetch page on cookie failure
response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash'))
# Build performer
tree = html.fromstring(response.text)
performer = dict(Gender = "female")
model_profile_wrap_xpath = '//div[contains(@class, "model-profile-wrap")]'
performer["name"] = tree.xpath('//h1[contains(@class, "page-title")]')[0].text
performer["url"] = f"{model_url}/1/atk-girlfriends-{performer['name'].replace(' ', '-')}"
performer["ethnicity"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Ethnicity")]/following-sibling::text()')[0].strip().capitalize()
performer["hair_color"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Hair Color")]/following-sibling::text()')[0].strip().capitalize()
height_ft_ins_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Height")]/following-sibling::text()')[0].strip()
(height_ft_str, height_ins_str) = re.compile(r"(\d+)[\"'](\d+)").findall(height_ft_ins_str)[0]
height_ins = float(height_ft_str) * 12 + float(height_ins_str)
performer["height"] = str(int(height_ins * cms_per_in))
weight_lbs_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Weight")]/following-sibling::text()')[0].strip()
weight_lbs = float(re.compile(r"\d+").findall(weight_lbs_str)[0])
performer["weight"] = str(int(weight_lbs * kgs_per_lb))
performer["measurements"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Bust Size")]/following-sibling::text()')[0].strip()
performer["image"] = tree.xpath(f'{model_profile_wrap_xpath}/img/@src')[0]
# Build scene
scene = dict(studio = dict(name = "ATK Girlfriends"), performers = [performer])
movie_wrap_xpath = f'//img[contains(@src, "/{model_id}/{movie_id}")]/../../../..'
scene["title"] = tree.xpath(f'{movie_wrap_xpath}//h1')[0].text.strip()
scene["details"] = tree.xpath(f'{movie_wrap_xpath}//b[contains(text(), "Description")]/following-sibling::text()')[0].strip()
movie_url_relative = tree.xpath(f'{movie_wrap_xpath}//a/@href')[0]
scene["url"] = f'https://www.atkgirlfriends.com{movie_url_relative}'
scene["image"] = tree.xpath(f'{movie_wrap_xpath}//img/@src')[0]
return scene
input = sys.stdin.read()
match = filename_pattern.search(input)
if (match):
scene = getSceneByFilename(match.group())
output = json.dumps(scene)
print(output)
else:
log.debug("Filename does not match ATKGirlfriends pattern")
print(r"{}")

View File

@@ -0,0 +1,90 @@
name: ATK Girlfriends
# requires: py_common
sceneByFragment:
action: script
script:
- python
- ATKGirlfriends.py
performerByURL:
- action: scrapeXPath
url:
# Trying to scrape without the www. prefix will result in a redirect loop
- www.atkgirlfriends.com/tour/model/
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
# Trying to scrape without the www. prefix will result in a redirect loop
- www.atkgirlfriends.com/tour/movie/
scraper: sceneScraper
xPathScrapers:
performerScraper:
common:
$modelWrap: &modelWrap //div[contains(@class, "model-profile-wrap")]
performer:
Name: //h1[contains(@class, "page-title")]
Gender:
fixed: female
Ethnicity:
selector: $modelWrap/b[contains(text(), "Ethnicity")]/following-sibling::text()
HairColor:
selector: $modelWrap/b[contains(text(), "Hair Color")]/following-sibling::text()
postProcess:
- map:
black: Black
blond: Blonde
brown: Brown
red: Red
white: White
Height:
selector: $modelWrap/b[contains(text(), "Height")]/following-sibling::text()
postProcess:
- feetToCm: true
Weight:
selector: $modelWrap/b[contains(text(), "Weight")]/following-sibling::text()
postProcess:
- replace:
- regex: (\d+).*
with: $1
- lbToKg: true
Measurements:
selector: $modelWrap/b[contains(text(), "Bust Size")]/following-sibling::text()
Image:
selector: $modelWrap/img/@src
sceneScraper:
common:
$movieWrap: //div[contains(@class, "movie-wrap")]
$modelWrap: *modelWrap
scene:
Title: //title
Details: $movieWrap/b[contains(text(), "Description")]/following-sibling::text()
Image:
selector: //video/@poster | //div[@class="flowplayer minimalist is-splash"]/@style
postProcess:
- replace:
- regex: background-image:url\('(.*)'\);
with: $1
Studio:
Name:
fixed: ATK Girlfriends
URL:
fixed: https://www.atkgirlfriends.com/
Tags:
Name:
selector: $movieWrap/b[contains(text(), "Tags")]/following-sibling::text()
postProcess:
- replace:
- regex: \s*,\s*
with: ","
split: ","
Performers:
Name: $modelWrap/text()[1]
driver:
cookies:
- CookieURL: https://www.atkgirlfriends.com
Cookies:
- Name: start_session_galleria
Domain: www.atkgirlfriends.com
Value: stash # Rotate this value if the scraper is blocked. The first request with the new value should fail.
Path: /
# Last Updated March 14, 2024

View File

@@ -0,0 +1,10 @@
id: ATKGirlfriends
name: ATK Girlfriends
metadata: {}
version: 71e0b03
date: "2024-03-15 01:14:32"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- ATKGirlfriends.yml
- ATKGirlfriends.py

View File

@@ -0,0 +1,39 @@
name: ATKHairy
sceneByURL:
- action: scrapeXPath
url:
- atkhairy.com/tour/
- atkpetites.com/tour/
- atkpremium.com/tour/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
selector: //span[@class='login']/a/@href
postProcess:
- replace:
- regex: .+join\.([^\.]+).+
with: $1
- map:
atkhairy: "ATK Natural & Hairy"
atkpetites: "ATK Petites"
atkpremium: "ATK Premium"
Performers:
Name: //div[@class='watchName']//text()
Title: //title/text()
Details: //div[contains(@class,'video-description')]//text()[not(contains(.,'Description'))]
Tags:
Name:
selector: //div[@class='video-tags tour_box left']/text()
split: " , "
Image:
selector: //div[@id='movie-poster']/@style
postProcess:
- replace:
- regex: .+(?:url\(")(.+)(?:".+)
with: $1
driver:
useCDP: true
# Last Updated July 04, 2021

View File

@@ -0,0 +1,9 @@
id: ATKHairy
name: ATKHairy
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- ATKHairy.yml

View File

@@ -0,0 +1,69 @@
name: AVEntertainments
sceneByURL:
- action: scrapeXPath
url:
- aventertainments.com
scraper: sceneScraper
#search by id is very picky, make sure to search using the full id eg XXX-014
#choose one of the below URLs and replace in the 'queryURL:' line
#search JAV DVD
#https://www.aventertainments.com/search_Products.aspx?languageID=1&dept_id=29&searchby=keyword&keyword={}
#search JAV PPV
#https://www.aventertainments.com/ppv/ppv_searchproducts.aspx?languageID=1&vodtypeid=1&keyword={}
#search Western DVD
#https://www.aventertainments.com/search_Products.aspx?languageID=1&dept_id=43&searchby=keyword&keyword={}}
#search Western PPV
#https://www.aventertainments.com/ppv/ppv_searchproducts.aspx?languageID=1&vodtypeid=2&keyword={}
sceneByName:
action: scrapeXPath
queryURL: https://www.aventertainments.com/search_Products.aspx?languageID=1&dept_id=29&searchby=keyword&keyword={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneSearch:
common:
$searchRes: //div[@class="single-slider-product grid-view-product"]
$searchTitle: //div[@class="single-slider-product grid-view-product"]//p[@class="product-title"]
scene:
Title: $searchTitle/a
URL: $searchTitle/a/@href
Image: $searchRes/div[@class="single-slider-product__image"]/a/img/@src
sceneScraper:
common:
$singleInfo: //div[@class="single-info"]
$performer: //div[@class="single-info"][contains(span,"Starring")]/span[@class="value"]/a
scene:
Title:
selector: $singleInfo[starts-with(span,"Item")]/span[@class="tag-title"]
Date:
selector: $singleInfo[contains(span,"Date")]/span[@class="value"]/text()
postProcess:
- parseDate: 1/2/2006
Performers:
Name: $performer
URL: $performer/@href
Tags:
Name: $singleInfo[contains(span,"Category")]/span[@class="value-category"]/a
Details:
selector: //div[@class="section-title"]/h3|//div[contains(@class,"product-description")]//text()
concat: "\n\n"
postProcess:
- replace:
- regex: '\s*(?:\(Blu-.ay[^)]*\))|(?:\((?:Full|FULL)\s?HD\))\s*(\n\n|$)'
with: $1
- regex: \.\.\.Read More(\n\n)?
with:
Image:
selector: //span[@class="grid-gallery"]/a/@href|//div[@id="PlayerCover"]/img/@src
Studio:
Name: $singleInfo[contains(span,"Studio")]/span[@class="value"]/a
URL: //link[@rel="canonical"]/@href
# Last Updated November 19, 2021

View File

@@ -0,0 +1,9 @@
id: AVE
name: AVEntertainments
metadata: {}
version: e29dc4c
date: "2021-11-23 20:35:48"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AVE.yml

View File

@@ -0,0 +1,168 @@
name: "AV Jiali"
sceneByURL:
- action: scrapeXPath
url:
- avjiali.com
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- avjiali.com
scraper: performerScraper
sceneByName:
action: scrapeXPath
queryURL: https://avjiali.com/?s={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h5/a
Date:
selector: //div[@class="video-date"]/text()
postProcess:
- replace:
- regex: (\d+)(st|nd|rd|th)
with: "$1"
- parseDate: January 02, 2006
Performers:
Name: //div[@class="cat"][1]/a[starts-with(@href, 'https://avjiali.com/model')]
URL: //div[@class="cat"][1]/a/@href
Tags:
Name: //div[@class="cat"][2]/a
Details:
selector: //p[span[@class="readmore"]]
postProcess:
- replace:
- regex: ... Read More
with:
Image:
selector: //video[@id="videohtml5tour"]/@poster
postProcess:
- replace:
- regex: ^//
with: "https://"
URL: //link[@rel="canonical"]/@href
Studio:
Name:
fixed: AV Jiali
performerScraper:
common:
$profile: //div[@class="model-profile"]
performer:
Name:
selector: $profile[contains(strong, "Name:")]//text()
postProcess:
- replace:
- regex: .*Name:\s*(.*)\s*$
with: $1
- regex: None
with:
Aliases:
selector: $profile[contains(strong, "Chinese name:")]//text()
postProcess:
- replace:
- regex: .*Chinese name:\s*(.*)\s*$
with: $1
- regex: None
with:
Birthdate:
selector: $profile[contains(strong, "Birth date:")]//text()
postProcess:
- replace:
- regex: .*Birth date:\s*(.*)\s*$
with: $1
- parseDate: January 2, 2006
- parseDate: 2006-01-02
Measurements:
selector: $profile[contains(strong, "Body:")]//text()
postProcess:
- replace:
- regex: .*Body:\s*(.*)\s*$
with: $1
- regex: B(\d*)-W(\d*)-H(\d*)
with: $1-$2-$3
- regex: --
with:
- regex: None
with:
Weight:
selector: $profile[contains(strong, "Weight:")]//text()
postProcess:
- replace:
- regex: .*Weight:\s*(.*)\s*$
with: $1
- regex: (\d+)kg
with: $1
- regex: None
with:
Height:
selector: $profile[contains(strong, "Height:")]//text()
postProcess:
- replace:
- regex: .*Height:\s*(.*)\s*$
with: $1
- regex: (\d+)cm.*
with: $1
- regex: None
with:
Details:
selector: //p[span[@class="readmore"]]
postProcess:
- replace:
- regex: ... Read More
with:
Piercings:
selector: $profile[contains(strong, "Piercings:")]//text()
postProcess:
- replace:
- regex: .*Piercings:\s*(.*)\s*$
with: $1
- regex: None|^No$|No Piercing.*
with:
Tattoos:
selector: $profile[contains(strong, "Tattoo:")]//text()
postProcess:
- replace:
- regex: .*Tattoo:\s*(.*)\s*$
with: $1
- regex: None|^No$|No Tattoo.*
with:
HairColor:
selector: $profile[contains(strong, "Hair color:")]//text()
postProcess:
- replace:
- regex: (?i).*hair color:?\s*(.*)\s*$
with: $1
- regex: None
with:
URL: //link[@rel="canonical"][1]/@href
Image:
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src
postProcess:
- replace:
- regex: ^//
with: https://
- regex: 460x640
with: 690x960
Ethnicity:
fixed: Asian
Gender:
fixed: Female
Country:
fixed: Taiwan
sceneSearch:
common:
$videos: //div[@class="thumb"]/a
scene:
Title: $videos/@title
URL: $videos/@href
Image: $videos/img/@src
Studio:
Name:
fixed: AVJiali
# Last Updated November 07, 2023

View File

@@ -0,0 +1,9 @@
id: AVJiali
name: AV Jiali
metadata: {}
version: 6b73583
date: "2023-11-07 19:19:47"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AVJiali.yml

View File

@@ -0,0 +1,30 @@
name: Abby Winters
sceneByURL:
- action: scrapeXPath
url:
- abbywinters.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
fixed: AbbyWinters
Performers:
Name:
selector: //td/a[contains(@href,'/nude_girl/')]/text()
Title:
selector: //div[starts-with(@class,'page-heading')]/div/h1/text()
Date:
selector: //th[text()='Release date']/following-sibling::td/text()
postProcess:
- parseDate: 02 Jan 2006
Details:
selector: //div[@class[contains(.,'description')]]//text()
concat: " "
Tags:
Name:
selector: //p//a[contains(@href,'/fetish/') or contains(@href,'/browse') or contains(@href,'/amateurs/')]/text()
Image:
selector: //div[@class='video-player-container video-responsive']/@data-poster|//img[@class="img-responsive"]/@src
# Last Updated April 02, 2024

View File

@@ -0,0 +1,9 @@
id: AbbyWinters
name: Abby Winters
metadata: {}
version: df04151
date: "2024-04-02 22:19:57"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AbbyWinters.yml

View File

@@ -0,0 +1,44 @@
name: AbuseMe
sceneByURL:
- action: scrapeXPath
url:
- abuseme.com/video
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h1[@class="shoot-title"]/text()
Date:
selector: //div[@class="playerHdr_rgt"]/text()
postProcess:
- replace:
- regex: (\w+:\s)(.+)
with: $2
- parseDate: Jan 2, 2006
Details: //div[@class="playerTxt"]
Tags:
Name:
selector: //meta[@http-equiv="keywords"]/@content
split: ","
# Bad way to get tags
Performers:
Name:
selector: //script[contains(.,"bbGaParams.shootModels")]/text()
postProcess:
- replace:
- regex: .+\[(".+").\].+
with: $1
- regex: ("\d+\s:\s)([^"]+)(")
with: $2
split: ","
Studio:
Name:
fixed: AbuseMe
Image:
selector: //img[@class="playerPic"]/@src
postProcess:
- replace:
- regex: ^\/\/
with: "https:"
# Last Updated August 03, 2020

View File

@@ -0,0 +1,9 @@
id: AbuseMe
name: AbuseMe
metadata: {}
version: 2d7046f
date: "2020-09-23 21:14:47"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AbuseMe.yml

View File

@@ -0,0 +1,31 @@
# requires: Algolia
name: "Active Duty"
sceneByURL:
- action: script
url:
- activeduty.com/en/video
script:
- python
- ../Algolia/Algolia.py
- activeduty
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- activeduty
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- activeduty
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- activeduty
- validName
# Last Updated September 26, 2023

View File

@@ -0,0 +1,9 @@
id: ActiveDuty
name: Active Duty
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- ActiveDuty.yml

View File

@@ -0,0 +1,40 @@
# requires: Algolia
name: "addicted 2 Girls"
sceneByURL:
- action: script
url:
- addicted2girls.com/en/video
script:
- python
- ../Algolia/Algolia.py
- addicted2girls
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- addicted2girls
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- addicted2girls
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- addicted2girls
- validName
galleryByURL:
- action: script
url:
- addicted2girls.com/en/photo/
script:
- python
- ../Algolia/Algolia.py
- addicted2girls
- gallery
# Last Updated December 22, 2022

View File

@@ -0,0 +1,9 @@
id: AddictedToGirls
name: addicted 2 Girls
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AddictedToGirls.yml

View File

@@ -0,0 +1,69 @@
name: AdultDoorway
sceneByURL:
- action: scrapeXPath
url:
- adultdoorway.com/tour/
- amateurthroats.com/tour/
- analrecruiters.com/tour/
- blackonblackcrime.com/tour/
- bustyamateurboobs.com/tour/
- clubamberrayne.com/tour/
- ebonycumdumps.com/tour/
- facialabuse.com/tour/
- fuckmepov.com/tour/
- ghettodoorway.com/tour/
- ghettogaggers.com/tour/
- hardcoredoorway.com/tour/
- hugerubberdicks.com/tour/
- joethepervert.com/tour/
- latinaabuse.com/tour/
- monstercockmadness.com/tour/
- nastylittlefacials.com/tour/
- pinkkittygirls.com/tour/
- pornaddict.com/tour/
- povhotel.com/tour/
- sexysuckjobs.com/tour/
- spermsuckers.com/tour/
- thehandjobsite.com/tour/
- thepantyhosesite.com/tour/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$scene: //div[contains(@class,"update-info ")]
scene:
Title:
selector: $scene//h1
Details: //div[@class="update-info-block"]/div[@class="update-info-block"][not(ul)]
Date:
selector: ($scene//strong[contains(text(),'Added')]/following-sibling::text())[1]
postProcess:
- replace:
- regex: ^([^\|]+).*
with: $1
- parseDate: January 2, 2006
Image:
selector: >
//base/@href
| //div[@id='fakeplayer']//img/@src0_1x
| //div[@id='fakeplayer']//img/@src0_2x
| //div[@id='fakeplayer']//img/@src0_3x
| //div[@id='fakeplayer']//img/@src0_4x
concat: __SEP__
postProcess:
- replace:
- regex: ^(.+(\.com)).+__SEP__(.+)$
with: $1$3
- regex: ^.+(https:.+)$
with: $1
Tags:
Name: $scene//ul[@class='tags']/li/a/text()
Studio:
Name:
selector: //div[@class="update-info-block"]//span[contains(@class,"bg-secondary")]/text()
postProcess:
- replace:
- regex: "[^a-zA-Z0-9]+$"
with: ""
# Last Updated March 28, 2024

View File

@@ -0,0 +1,9 @@
id: AdultDoorway
name: AdultDoorway
metadata: {}
version: 2ba82f0
date: "2024-03-29 01:34:58"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultDoorway.yml

View File

@@ -0,0 +1,73 @@
name: AdultDvdMarketPlace
movieByURL:
- action: scrapeXPath
url:
- adultdvdmarketplace.com/xcart/adult_dvd/dvd_view.php?adult_dvd_id=
- adultdvdmarketplace.com/dvd_view
scraper: movieScraper
sceneByName:
action: scrapeXPath
queryURL: https://www.adultdvdmarketplace.com/xcart/adult_dvd/dvd_search.php?type=title&search={}
scraper: sceneSearch
sceneByURL:
- action: scrapeXPath
url:
- adultdvdmarketplace.com/dvd_view
scraper: sceneScraper
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
movieScraper:
movie:
Name: //meta[@property="og:title"]/@content
Director: //span[contains(text(),"Director")]/following-sibling::a/text()
Date:
selector: //span[contains(text(),"Released")]/following-sibling::text()
postProcess:
- parseDate: 01/2006
Synopsis: //h3[contains(text(), "Description")]/following-sibling::p
Studio:
Name: //span[@itemprop="brand"]/text()
FrontImage: //strong[contains(text(),"Large Front")]/parent::a/@href
BackImage: //strong[contains(text(),"Large Back")]/parent::a/@href
sceneSearch:
common:
$selection: //div[contains(concat(' ',normalize-space(@class),' '),' product-col ')]
scene:
Title: $selection//h4/a
URL:
selector: $selection//h4/a/@href
postProcess:
- replace:
- regex: ^
with: https://www.adultdvdmarketplace.com
Image: $selection//a/img/@src
sceneScraper:
scene:
Title: //meta[@property="og:title"]/@content
Details: //h3[contains(text(), "Description")]/following-sibling::p
Date:
selector: //span[contains(text(),"Released")]/following-sibling::text()
postProcess:
- parseDate: 01/2006
Image: //strong[contains(text(),"Large Front")]/parent::a/@href
Studio:
Name: //span[@itemprop="brand"]/text()
Movies:
Name: //meta[@property="og:title"]/@content
URL: //meta[@property="og:url"]/@content
Performers:
Name: //h3[text()="Cast"]/following-sibling::a
Tags:
Name: //span[text()="Category:"]/following-sibling::a
# Last Updated September 07, 2020

View File

@@ -0,0 +1,9 @@
id: AdultDvdMarketPlace
name: AdultDvdMarketPlace
metadata: {}
version: caf2433
date: "2023-08-04 00:38:00"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultDvdMarketPlace.yml

View File

@@ -0,0 +1,205 @@
name: AdultEmpire
sceneByURL:
- action: scrapeXPath
url:
- adultdvdempire.com/clip
- adultempire.com/clip
scraper: sceneScraperSingle
- action: scrapeXPath
url:
- adultdvdempire.com
- adultempire.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- adultdvdempire.com
- adultempire.com
scraper: movieScraper
performerByName:
action: scrapeXPath
queryURL: https://www.adultempire.com/performer/search?q={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- adultempire.com
- adultdvdempire.com
scraper: performerScraper
sceneByName:
action: scrapeXPath
# if needed replace `dvd` with `vod`
queryURL: https://www.adultdvdempire.com/dvd/search?q={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
performerSearch:
common:
$perfomerRoot: //div[@id="performerlist"]//a
performer:
Name: $perfomerRoot/@label
URL:
selector: $perfomerRoot/@href
postProcess:
- replace:
- regex: ^
with: https://www.adultempire.com
sceneSearch:
scene:
Title:
selector: //div[@class="item-title"]/a/@*[local-name()="href" or local-name()="title"]
concat: "|"
postProcess:
- replace:
- regex: '/(\d+)/[^|]+\|([^|]+)'
with: "$2 ($1)"
split: "|"
URL:
selector: //div[@id="content"]//div[@class="item-title"]/a/@href
postProcess:
- replace:
- regex: ^
with: "https://www.adultdvdempire.com"
Image:
selector: //a[@class="boxcover"]/img/@data-src
movieScraper:
movie:
Name: //h1/text()
Director: //a[@label="Director"]/text()
Duration:
selector: //small[contains(text(), "Length")]/following-sibling::text()
postProcess:
- replace:
- regex: " hrs. "
with: ":"
- regex: " mins."
with: ":00"
Date:
selector: //small[contains(text(), "Released")]/following-sibling::text()
postProcess:
- parseDate: Jan 02 2006
Synopsis:
selector: //div[contains(@class,"synopsis-content")]//text()
concat: " "
Studio:
Name: //a[@label="Studio"]/text()
FrontImage: //a[@id="front-cover"]/@data-href
BackImage: //a[@id="back-cover"]/@href
# Rating is not yet implemented in the UX
# Rating:
# selector: //span[@class='rating-stars-avg']/text()
# postProcess:
# - replace:
# - regex: (\d).+
# with: $1
URL: //meta[@name='og:url']/@content
sceneScraperSingle:
common:
$header: //div[@class="clip-page__detail__title text-display-primary"]
$clipId: (//*[@data-tid]/@data-tid)[1]
scene:
Title: $header/h1
Studio:
Name: //div[contains(text(), "by")]/a
Movies:
Name: //div[contains(text(), "from")]/a
Date:
selector: //strong[contains(text(), "Released")]/following-sibling::text()
postProcess:
- parseDate: Jan 02 2006
Tags:
Name: //strong[contains(text(), "Attributes")]/following-sibling::a/text()
Performers:
Name: //strong[contains(text(), "Starring")]/following-sibling::a/text()
URL:
selector: //strong[contains(text(), "Starring")]/following-sibling::a/@href
postProcess:
- replace:
- regex: ^
with: "https://www.adultdvdempire.com"
Image:
selector: (//*[@data-tid]/@data-tid)[1] | //a[@id="front-cover"]/img/@src
concat: __SEPARATOR__
postProcess:
- replace:
- regex: (\d+).*/([^/]*\d+)[^/\d]*$
with: https://imgs1cdn.adultempire.com/backdrop/6000/$2%5f$1/scene-1.jpg
sceneScraper:
scene:
Title: //h1/text()
Details:
selector: //div[contains(@class,"synopsis-content")]//text()
concat: " "
Date:
selector: //small[contains(text(), "Released")]/following-sibling::text()
postProcess:
- parseDate: Jan 02 2006
Director: //a[@label="Director"]/text()
Image: //a[@id="front-cover"]/@data-href
Studio:
Name: //a[@label="Studio"]/text()
Movies:
Name: //h1/text()
URL: //link[@rel="canonical"]/@href
Tags:
Name: //div[h2[contains(.,'Categories')]]//a[@label="Category"]/text()
Performers:
Name: //a[@label="Performer"]//text()
URL:
selector: //a[@label="Performer"]/@href
postProcess:
- replace:
- regex: ^
with: "https://www.adultdvdempire.com"
URL: //meta[@name='og:url']/@content
performerScraper:
common:
$infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul
performer:
Name: //*[@id="content"]/section/div/div[2]/h1/text()
Birthdate:
selector: $infoPiece/li[contains(text(), 'Born:')]/text()
postProcess:
- replace:
- regex: Born:\s+(.*)
with: $1
Height:
selector: $infoPiece/li[contains(text(), 'Height:')]/text()
postProcess:
- replace:
- regex: Height:\s+(.*)
with: $1
- feetToCm: true
Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src|//section[@class="container-fluid"]//a[@class="fancy"][@label="Headshot"]/@href
Country:
selector: $infoPiecel/li[contains(text(), 'From:')]/text()
postProcess:
- replace:
- regex: From:\s+(.*)
with: $1
Measurements:
selector: $infoPiece/li[contains(text(), 'Measurements:')]/text()
postProcess:
- replace:
- regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).*
with: $1-$2-$3
Aliases:
selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")]
concat: ", "
postProcess:
- replace:
- regex: "Alias: (.*)"
with: $1
Details: //*[@id="content"]/section/div/div[5]/aside/text()
URL: //link[@rel='canonical']/@href
# Last Updated Feburary 22, 2024

View File

@@ -0,0 +1,9 @@
id: AdultEmpire
name: AdultEmpire
metadata: {}
version: 24e992f
date: "2024-02-29 00:20:17"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultEmpire.yml

View File

@@ -0,0 +1,160 @@
name: AdultEmpireCash
# Sites ("Brands") from https://www.adultempirecash.com
sceneByURL:
- action: scrapeXPath
scraper: sceneScraper
url:
- blackmassivecocks.com/
- braziliantransgirls.com/
- bruthasinc.com/
- concoxxxion.com/
- elegantangel.com/
- forbiddenfruitsfilms.com/
- hornyhousehold.com/
- hotwifefun.com/
- hotwivescheating.com/
- jayspov.net/
- jodiwest.com/
- kingsoffetish.com/
- latinoguysporn.com/
- lethalhardcore.com/
- pornstarstroker.com/ # aggregator site?
- reaganfoxx.com/
- severesexfilms.com/
- smutfactor.com/
- spankmonster.com/
- stephousexxx.com/
- thirdworldxxx.com/
- transationalfantasies.com/
- wcpclub.com/
- westcoastproductions.com/
- data18.empirestores.co/
# VR Sites
- action: scrapeXPath
scraper: vrSceneScraper
url:
- lethalhardcorevr.com/
- whorecraftvr.com/
movieByURL:
- action: scrapeXPath
scraper: movieScraper
url:
- braziliantransgirls.com/
# - bangmytransass.com/ # Scenes published as movies?
# - blacktranswhackers.com/ # Scenes published as movies?
- concoxxxion.com/
- elegantangel.com/
- filthykings.com/
- forbiddenfruitsfilms.com/
- lethalhardcore.com/
- severesexfilms.com/
# - shemalestrokers.com/ # Scenes published as movies?
- thirdworldxxx.com/
- transationalfantasies.com/
- wcpclub.com/
- westcoastproductions.com/
- data18.empirestores.co/
xPathScrapers:
sceneScraper:
scene:
Title: //h1[@class="description"]/text()
Date:
selector: //div[@class="release-date"][1]/text()
postProcess:
- parseDate: Jan 02, 2006
Details:
selector: //div[@class="synopsis"]//text()
concat: "\n\n"
Tags:
Name: //div[@class="tags"]/a/text()|//div[@class="categories"]/a/text()
Performers:
Name: //div[@class="video-performer"]/a//text()
Studio:
Name:
selector: //div[@class="studio"]/span[contains(text(),"Studio:")]/following-sibling::*
postProcess:
# Override studio name only when needed
- map:
Jay Rock Clips: Jay's POV
- replace:
# 'Jane Doe Clips' => 'Jane Doe'
- regex: \sClips$
with:
Image: //link[@rel="image_src"]/@href
Movies:
Name: //h1[@class="description"]/following-sibling::p/a/text()
Director:
selector: //div[@class="director"]/text()
postProcess:
- replace:
- regex: \s{2,}
with: " "
- regex: " ,"
with: ","
Code:
selector: //meta[@name="og:url"]/@content
postProcess:
- replace:
- regex: ^https?://[^/]+/(\d+)/.+
with: $1
- regex: ^http.*
with:
movieScraper:
movie:
Name: //h1[@class="description"]/text()
Date:
selector: //div[@class="release-date"][1]/text()
postProcess:
- parseDate: Jan 02, 2006
Synopsis:
selector: //div[@class="synopsis"]//text()
concat: "\n\n"
Director: //div[@class="director"]/a/text()
Duration:
selector: //div[@class="release-date"][2]/text()
postProcess:
- replace:
- regex: " hrs. "
with: ":"
- regex: " mins."
with: ":00"
Studio:
Name:
selector: //div[@class="studio"]/a
postProcess:
- replace:
# 'Jane Doe Clips' => 'Jane Doe'
- regex: \sClips$
with:
FrontImage: //link[@rel="image_src"]/@href
BackImage: //div[@class="carousel-item"]/img/@data-src
vrSceneScraper:
common:
$content: //div[@class="item-page-details"]
scene:
Title: $content//h1
Details:
selector: $content//div[@id="synopsis-full"]
concat: "\n\n"
Performers:
Name:
selector: $content//span[@class="overlay-inner"]//small
postProcess:
- replace:
- regex: \((.+)\)
with: $1
Image:
selector: //link[@rel="image_src"]/@href
postProcess:
- replace:
- regex: ^.+/(\d+)h.+$
with: https://imgs1cdn.adultempire.com/product/${1}_lg.jpg
Studio:
Name: //meta[@name="og:site_name"]/@content
# Last Updated March 19, 2024

View File

@@ -0,0 +1,9 @@
id: AdultEmpireCash
name: AdultEmpireCash
metadata: {}
version: e7be479
date: "2024-03-29 14:39:33"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultEmpireCash.yml

View File

@@ -0,0 +1,86 @@
name: AdultFilmIndex
sceneByURL:
- action: scrapeJson
url:
- https://adultfilmindex.com/movie/
scraper: sceneScraper
queryURL: https://adultfilmindex.com/api/v1/stash/scene/{url}
queryURLReplace:
url:
- regex: '.+/movie/([^/]+)/([^/]+)/scene/([^/]+)$'
with: "${3}"
sceneByName:
action: scrapeJson
queryURL: https://adultfilmindex.com/api/v1/stash/scene_search/{}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeJson
queryURL: "{url}"
scraper: sceneScraper
sceneByFragment:
action: scrapeJson
queryURL: https://adultfilmindex.com/api/v1/stash/scene_fragment_search/{filename}/{oshash}
scraper: sceneScraper
movieByURL:
- action: scrapeJson
url:
- https://adultfilmindex.com/movie/
scraper: movieScraper
queryURL: https://adultfilmindex.com/api/v1/stash/movie/{url}
queryURLReplace:
url:
- regex: '.+/movie/([^/]+)/.+$'
with: "${1}"
jsonScrapers:
sceneSearch:
scene:
Title: data.#.title
Date: data.#.release_date
Image: data.#.thumbnail
URL: data.#.url
Details: data.#.description
sceneScraper:
scene:
Title: data.title
Details: data.description
URL: data.url
Image: data.thumbnail
Date:
selector: data.movie.release_date
postProcess:
- replace:
- regex: T.*$
with: ""
- parseDate: 2006-01-02
Movies:
Name: data.movie.title
Performers:
Name: data.actors.#.name
Studio:
Name: data.movie.studio.name
Tags:
Name: data.tags.#.name
movieScraper:
movie:
Name: data.title
Synopsis: data.description
URL: data.url
Duration: data.runtime
Date:
selector: data.release_date
postProcess:
- replace:
- regex: T.*$
with: ""
- parseDate: 2006-01-02
Studio:
Name: data.studio.name
FrontImage: data.front_cover
BackImage: data.back_cover
driver:
headers:
- Key: User-Agent
Value: stashjson/1.0.0
- Key: Authorization # Beta key, enabled and active for now
Value: Bearer 4vY0iwSUVPH5cGAX1AUZarJ8pbuDUK53
# Last Updated February 05, 2022

View File

@@ -0,0 +1,9 @@
id: AdultFilmIndex
name: AdultFilmIndex
metadata: {}
version: 8d19518
date: "2022-02-06 20:14:43"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultFilmIndex.yml

View File

@@ -0,0 +1,71 @@
name: "AdultPrime"
sceneByName:
action: scrapeXPath
queryURL: "https://adultprime.com/studios/search?q={}"
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
sceneByURL:
- action: scrapeXPath
url:
- adultprime.com/studios/video
scraper: sceneScraper
xPathScrapers:
sceneSearch:
common:
$scenes: //ul[@id="studio-videos-container"]
scene:
Title:
selector: $scenes//span[contains(@class, "description-title")]/text()
Date:
selector: $scenes//span[@class="description-releasedate"]/text()
postProcess:
- parseDate: Jan 02, 2006
URL:
selector: $scenes//a[@class="absolute"]/@href
postProcess:
- replace:
- regex: "^.signup.galleryId."
with: "https://adultprime.com/studios/video/"
Image: $scenes//div[contains(@class, 'ap-slider-img')]/@data-src
sceneScraper:
common:
$scene: //div[@class="update-info-container"]
scene:
Title: $scene//h2
Date:
selector: //div[@id="theatre-row"]//span[@class="description-releasedate"]/text()
postProcess:
- parseDate: 02-01-2006
Details:
selector: $scene//p[contains(@class,"ap-limited-description-text")]
Image:
selector: //div[@id="theatre-row"]//video/@poster | //div[@id="theatre-row"]//div[contains(@class, "video-wrapper")]//div[contains(@style,"background-image:") and not(contains(@style,"player-placeholder.gif"))]/@style
postProcess:
- replace:
# Sometimes they put the URL in quotes, sometimes they just don't
# https://regex101.com/r/fszqAQ/3
- regex: .+url\(\"?(.+)\"?\).+
with: $1
URL: &url //link[@rel="canonical"]/@href
Code:
selector: *url
postProcess:
- replace:
- regex: \D+(\d+)$
with: $1
Studio:
Name: $scene//p[contains(@class,"update-info-line")]//a[contains(@href,"/studio/")]/text()
Tags:
Name:
selector: $scene//b[contains(.,"Niches")]/following-sibling::text()
split: ", "
Performers:
Name: $scene//b[contains(.,"Performer")]/following-sibling::a/text()
# Last Updated February 22, 2023

View File

@@ -0,0 +1,9 @@
id: AdultPrime
name: AdultPrime
metadata: {}
version: 3d8edf6
date: "2024-02-25 22:07:08"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultPrime.yml

View File

@@ -0,0 +1,134 @@
name: Raunchy Bastards
sceneByURL:
- action: scrapeXPath
url:
- boundjocks.com/scene/
- boyshalfwayhouse.com/scene/
- coltstudiogroup.com/scene/
- daddycarl.com/scene/
- hotoldermale.com/scene/
- monstercub.com/scene/
- naturalbornbreeders.com/scene/
- older4me.com/scene/
- raunchybastards.com/scene/
- stockydudes.com/scene/
- toplatindaddies.com/scene/
scraper: oldStyleSite
- action: scrapeXPath
url:
- blackboyaddictionz.com/scene/
- blacksondaddies.com/scene/
- myfirstdaddy.com/scene/
- playdaddy.com/scene/
scraper: newStyleSite
xPathScrapers:
oldStyleSite:
common:
$scene: //div[contains(@class, "sceneContainer")]
scene:
Title: $scene/div[@class="sceneTitle"]
Code:
selector: $scene//div[contains(@class, "sceneImgBig")]/@id
postProcess:
- replace:
- regex: \D*
with: $1
Date:
selector: $scene//span[contains(@class, "sceneDetails")]
postProcess: &ppDate
- replace:
# https://regex101.com/r/rsjbb6/3
- regex: ^(?:Details:\s*)?(\w{3}\s*\d{1,2}),\s*(\d{4}).*?$
with: $1, $2
- parseDate: Jan 2, 2006
# All of this can be replaced once scrapers get access to the URL they are scraping
Studio:
Name:
selector: &image >
$scene//video/@poster
| $scene//div[contains(@class, "sceneImgBig")]/img/@src
| //div[contains(@style, "background-image")]/@style
| //*[contains(@class, "videoTrailer") or contains(@class, "bgScene")]//@srcset
postProcess: &studioNameFromURL
- replace:
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
with: $1
- map:
blackboyaddictionz: Black Boy Addictionz
blacksondaddies: Blacks on Daddies
boundjocks: Bound Jocks
boyshalfwayhouse: Boys Halfway House
coltstudiogroup: Colt Studio Group
daddycarl: Daddy Carl
hotoldermale: Hot Older Male
monstercub: Monster Cub
myfirstdaddy: My First Daddy
naturalbornbreeders: Natural Born Breeders
older4me: Older4Me
playdaddy: Play Daddy
raunchybastards: Raunchy Bastards
stockydudees: Stocky dudes
toplatindaddies: Top Latin Daddies
URL:
selector: *image
postProcess:
- replace:
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
with: https://$1.com
Performers:
Name: >
$scene//div[contains(@class, "scenePerformers")]/a
| $scene//div[@class="scenePerf"]/span[@class="perfName"]
URL: >
$scene//div[contains(@class, "scenePerformers")]/a/@href
| $scene//div[@class="scenePerf"]/@data-href
Tags:
Name: $scene//a[@class="sceneTagsLnk"]/text()
Details:
selector: $scene//div[contains(@class, "sceneDescription")]/text()
concat: "\n\n"
Image:
selector: *image
postProcess:
- replace:
- regex: .*url\("(.*)"\).*
with: $1
- regex: \s*2x$
with:
newStyleSite:
common:
$details: //div[contains(@class, "container_styled_1")]
scene:
Title: //h2[@class="main_title"]
Code:
selector: //link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: \D*
with: $1
# All of this can be replaced once scrapers get access to the URL they are scraping
Studio:
Name:
selector: //link[@rel="canonical"]/@href
postProcess: *studioNameFromURL
URL:
selector: //link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$
with: https://$1.com
Performers:
Name: $details//span[contains(@class, "perfImage")]/a
URL: $details//span[contains(@class, "perfImage")]/a/@href
Details:
selector: $details//p/text()
concat: "\n\n"
Date:
selector: ($details//h5[contains(text(), "Details")]/text())[1]
postProcess: *ppDate
Image:
selector: //meta[@property="og:image"]/@content
Tags:
Name: $details//h5[contains(., "Categories")]/a/text()
# Last Updated September 22, 2023

View File

@@ -0,0 +1,9 @@
id: AdultSiteRunner
name: Raunchy Bastards
metadata: {}
version: 0b5e6be
date: "2023-09-22 04:12:45"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AdultSiteRunner.yml

View File

@@ -0,0 +1,120 @@
# requires: Algolia
name: "Adultime"
sceneByURL:
- action: script
url:
- accidentalgangbang.com/en/video/
- adamandevepictures.com/en/video/
- adulttime.com/en/video/
- adulttimepilots.com/en/video/
- agentredgirl.com/en/video/
- analteenangels.com/en/video/
- asmrfantasy.com/en/video/
- assholefever.com/en/video/
- beingtrans247.com/en/video/
- blowmepov.com/en/video/
- caughtfapping.com/en/video/
- devilsfilm.com/en/video/
- devilstgirls.com/en/video/
- dpfanatics.com/en/video/
- extremepickups.com/en/video/
- famedigital.com/en/video/
- footsiebabes.com/en/video/
- forbiddenseductions.com/en/video/
- girlstryanal.com/en/video/
- givemeteens.com/en/video/
- hairyundies.com/en/video/
- isthisreal.com/en/video/
- joymii.com/en/video/
- kissmefuckme.com/en/video/
- lezbebad.com/en/video/
- lezcuties.com/en/video/
- massage-parlor.com/en/video/
- milkingtable.com/en/video/
- mixedx.com/en/video/
- modeltime.com/en/video/
- moderndaysins.com/en/video/
- mommysboy.com/en/video/
- mommysgirl.com/en/video/
- muses.com/en/video/
- nudefightclub.com/en/video/
- nurumassage.com/en/video/
- officemsconduct.com/en/video/
- oopsie.com/en/video/
- outofthefamily.com/en/video/
- peternorth.com/en/video/
- prettydirty.com/en/video/
- puretaboo.com/en/video/
- sabiendemonia.com/en/video/
- soapymassage.com/en/video/
- teensneaks.com/en/video/
- theyeslist.com/en/video/
- transfixed.com/en/video/
- transgressivefilms.com/en/video/
- trickyspa.com/en/video/
- truelesbian.com/en/video/
- webyoung.com/en/video/
- welikegirls.com/en/video/
- wolfwagner.com/en/video/
script:
- python
- ../Algolia/Algolia.py
- girlsway
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- girlsway
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- girlsway
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- girlsway
- validName
galleryByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- puretaboo
- gallery
galleryByURL:
- action: script
url:
- accidentalgangbang.com/en/photo/
- devilsfilm.com/en/photo/
- joymii.com/en/photo/
- mommysgirl.com/en/photo/
- nurumassage.com/en/photo/
- peternorth.com/en/photo/
- prettydirty.com/en/photo/
- puretaboo.com/en/photo/
- transfixed.com/en/video/
- webyoung.com/en/photo/
script:
- python
- ../Algolia/Algolia.py
- puretaboo
- gallery
movieByURL:
- action: script
url:
- adulttime.com/en/dvd/
- devilsfilm.com/en/dvd/
- devilstgirls.com/en/dvd/
- outofthefamily.com/en/dvd/
script:
- python
- ../Algolia/Algolia.py
- puretaboo
- movie
# Last Updated October 12, 2023

View File

@@ -0,0 +1,9 @@
id: Adultime
name: Adultime
metadata: {}
version: e2b4ebb
date: "2024-01-31 07:58:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Adultime.yml

View File

@@ -0,0 +1,55 @@
name: AkiraLaneProductions
sceneByURL:
- action: scrapeXPath
url:
- akiralane.com/tour/
- akiralanebound.com/tour/
- pantyhoselane.com/tour/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h1
Details: //div[@class="videocontent"]/p
Image:
selector: //base/@href | //script[contains(.,'poster')]
concat: __SEP__
postProcess:
- replace:
- regex: ^.*(https://[^/]+).+src0_.x="([^"]+).+
with: $1$2
Date:
selector: //div[@class="videodetails"]/p[@class="date"]
postProcess:
- replace:
- regex: "^([0-9/]+).*"
with: $1
- parseDate: 01/02/2006
Tags:
Name: //div[@class="videodetails"]//a[contains(@href,"categories")]/text()
Performers:
Name:
selector: //p[@class="modelname"]/comment()
postProcess:
- replace:
# Try to extract model names from comment
# https://regex101.com/r/IF0T90/1
- regex: .*?>([A-Z][^<>]+).*?(:?</a>\s*</span>\s*-->)?
with: "$1|"
# Default to Akira Lane if the comment is empty (it's hardcoded in the site as well)
- regex: "<!-- -->"
with: "Akira Lane"
split: "|"
Studio:
Name:
selector: //base/@href
postProcess:
- replace:
- regex: ^.*https://(www\.)?([^\.]+).+
with: $2
- map:
akiralane: Akira Lane
akiralanebound: Akira Lane Bound
pantyhoselane: Pantyhose Lane
# Last Updated April 1, 2024

View File

@@ -0,0 +1,9 @@
id: AkiraLaneProductions
name: AkiraLaneProductions
metadata: {}
version: 1b2c30e
date: "2024-04-02 02:47:59"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AkiraLaneProductions.yml

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,9 @@
id: Algolia
name: Algolia
metadata: {}
version: 127fc12
date: "2024-03-16 21:24:34"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Algolia.py

View File

@@ -0,0 +1,38 @@
name: allanalallthetime
sceneByURL:
- action: scrapeXPath
url:
- allanalallthetime.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Performers:
Name:
selector: //div[@class='customcontent']/h3/text()
split: ","
Title:
selector: //div[@class='customcontent']/h1/text()
Details:
selector: //div[@class='customcontent']/div[1]/text()|//div[@class='customcontent']/h2/text()
Tags:
Name:
selector: //div[@class='customcontent']/h4/text()
split: ","
Studio:
Name:
fixed: AllAnalAllTheTime
Image:
selector: //div[@class="content"]//@src
postProcess:
- replace:
- regex: ^
with: "https://www.allanalallthetime.com/"
Date:
selector: //div[@class='date-and-covers']/div[@class='date']/text()
postProcess:
- replace:
- regex: (\d+)(st|nd|rd|th)
with: "$1"
- parseDate: January 2 2006
# Last Updated July 18, 2021

View File

@@ -0,0 +1,9 @@
id: AllAnalAllTheTime
name: allanalallthetime
metadata: {}
version: e502563
date: "2021-07-18 19:37:16"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AllAnalAllTheTime.yml

View File

@@ -0,0 +1,40 @@
# requires: Algolia
name: "AllGirlMassage"
sceneByURL:
- action: script
url:
- allgirlmassage.com/en/video
script:
- python
- ../Algolia/Algolia.py
- allgirlmassage
sceneByFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- allgirlmassage
sceneByName:
action: script
script:
- python
- ../Algolia/Algolia.py
- allgirlmassage
- searchName
sceneByQueryFragment:
action: script
script:
- python
- ../Algolia/Algolia.py
- allgirlmassage
- validName
galleryByURL:
- action: script
url:
- allgirlmassage.com/en/photo/
script:
- python
- ../Algolia/Algolia.py
- allgirlmassage
- gallery
# Last Updated December 22, 2022

View File

@@ -0,0 +1,9 @@
id: AllGirlMassage
name: AllGirlMassage
metadata: {}
version: 97bec71
date: "2023-11-22 00:53:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AllGirlMassage.yml

View File

@@ -0,0 +1,33 @@
name: Alterpic
sceneByURL:
- action: scrapeJson
url:
- alterpic.com/videos/
scraper: sceneScraper
queryURL: "https://alterpic.mymember.site/api/videos/{url}"
queryURLReplace:
url:
- regex: '.+/videos/(\d+)-.+'
with: "$1"
jsonScrapers:
sceneScraper:
scene:
Title: title
Details: description
Date:
selector: publish_date
postProcess:
- replace:
- regex: ^(\S+)\s.+
with: $1
- parseDate: 2006-01-02T15:04:05.000000Z
Image: poster_src
Performers:
Name: casts.#.screen_name
Studio:
Name:
fixed: Alterpic
Tags:
Name: tags.#.name
Code: id
# Last Updated January 20, 2024

View File

@@ -0,0 +1,9 @@
id: Alterpic
name: Alterpic
metadata: {}
version: 8b4518f
date: "2024-01-21 02:56:44"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Alterpic.yml

View File

@@ -0,0 +1,66 @@
name: AmateurAllure
galleryByURL:
- action: scrapeXPath
url: &urls
- amateurallure.com/tour/scenes/
- swallowsalon.com/scenes/
scraper: galleryScraper
sceneByURL:
- action: scrapeXPath
url: *urls
scraper: sceneScraper
xPathScrapers:
galleryScraper:
common: &commonAttr
$sceneinfo: //div[@class="scene-info"]
$title: //span[@class='title_bar_hilite']
$logo: //div[@id="logo"]//a/@href
gallery:
Title: &titleSel $title
Date: &dateAttr
selector: //div[@class="backgroundcolor_info"]//div[@class='cell update_date']
postProcess:
- parseDate: 01/02/2006
Details: &detailsSel //span[@class='update_description']
Tags: &tagsAttr
Name: //span[@class='update_tags']//a/text()
Performers: &performersAttr
Name: //div[@class='backgroundcolor_info']//span[@class='update_models']//a
Studio: &studioAttr
Name:
selector: $logo
postProcess:
- replace:
- regex: '[^.]+\.([^.]+)\.com/.*'
with: $1
- map:
amateurallure: Amateur Allure
swallowsalon: Swallow Salon
sceneScraper:
common: *commonAttr
scene:
Title: *titleSel
Date: *dateAttr
Details: *detailsSel
Tags: *tagsAttr
Performers: *performersAttr
Image:
selector: $logo|$title
concat: "|"
postProcess:
- replace:
- regex: \s
with: "+"
- regex: ^([^|]+)\|(.+)
with: "${1}/search.php?st=advanced&qall=&qany=&qex=$2"
- subScraper:
selector: //div[@id="logo"]//a/@href|//img/@srcset
concat: "|"
postProcess:
- replace:
- regex: ^([^|]+amateurallure[^|]+)\|.+(/content/contentthumbs/\d+/\d+/[^/]+\.jpg) 1920w
with: $1$2
- regex: ^([^|]+swallowsalon[^|]+)\|.+360w,\s((?:/tour)?/content/contentthumbs/\d+/\d+/[^/]+\.jpg).*
with: $1$2
Studio: *studioAttr
# Last Updated December 13, 2021

View File

@@ -0,0 +1,9 @@
id: AmateurAllure
name: AmateurAllure
metadata: {}
version: bc00323
date: "2021-12-13 21:54:41"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AmateurAllure.yml

View File

@@ -0,0 +1,27 @@
name: AmateurBoxxx
sceneByURL:
- action: scrapeXPath
url:
- amateurboxxx.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //span[@class="update_title"]
Date:
selector: //span[@class="availdate"]
postProcess:
- parseDate: 01/02/2006
Performers:
Name: //span[@class="tour_update_models"]/a
Tags:
Name: //span[@class="update_tags"]/a
Details:
selector: //span[@class="latest_update_description"]
Image:
selector: //meta[@property="og:image"]/@content
Studio:
Name:
fixed: AmateurBoxxx
# Last Updated May 03, 2021

View File

@@ -0,0 +1,9 @@
id: AmateurBoxxx
name: AmateurBoxxx
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AmateurBoxxx.yml

View File

@@ -0,0 +1,32 @@
name: "AmateureXtreme"
sceneByURL:
- action: scrapeXPath
url:
- amateure-xtreme.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h1[@class='info-section--title']
URL: //link[@rel="canonical"]/@href
Date:
selector: //div[@class='posted-info meta-description small']/div[@class='float-left']/span[@class='text-nowrap']/span[@class='fa5-text']
postProcess:
- replace:
- regex: $
with: -01-01
Tags:
Name: //a[contains(@href,'collections?')]
Performers:
Name:
selector: //span[@class='models']/span[@class='fa5-text']/a
Studio:
Name:
fixed: AmateureXtreme
Image:
selector: //script
postProcess:
- replace:
- regex: .+image":\["(.+)\?.+
with: $1
# Last Updated September 13, 2022

View File

@@ -0,0 +1,9 @@
id: AmateureExtreme
name: AmateureXtreme
metadata: {}
version: e0aa84c
date: "2022-09-13 14:12:30"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AmateureExtreme.yml

View File

@@ -0,0 +1,37 @@
name: AmelieLou
sceneByURL:
- action: scrapeXPath
url:
- amelielou.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h1
Date:
selector: //span[contains(text(),"Release date:")]/following-sibling::text()
postProcess:
- parseDate: January 2, 2006
Performers:
Name:
fixed: Amelie Lou
Tags:
Name:
selector: //div[@class="item item-tags-change"]/a
postProcess:
- replace:
- regex: ","
with: ""
Details:
selector: //div[@class="item description-video"]
Image:
selector: //script[contains(text(),'getEmbed')]/text()
postProcess:
- replace:
- regex: .*?(https?:\/\/\S*\.jpg).*
with: $1
Studio:
Name:
fixed: AmelieLou
# Last Updated August 08, 2021

View File

@@ -0,0 +1,9 @@
id: AmelieLou
name: AmelieLou
metadata: {}
version: 1986e0c
date: "2021-08-08 13:01:04"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AmelieLou.yml

View File

@@ -0,0 +1,47 @@
name: AmourAngels
galleryByURL:
- action: scrapeXPath
url: &urlAttr
- amourangels.com/z_cover
scraper: galleryScraper
sceneByURL:
- action: scrapeXPath
url: *urlAttr
scraper: sceneScraper
xPathScrapers:
galleryScraper:
common: &commonSel
$table: //td[@class='blox-bg']/table
$performer: //td[@class='modinfo']//p//table//a
gallery:
Title: &titleSel $table//b/text()
Studio: &studioAttr
Name:
fixed: Amour Angels
Date: &dateAttr
selector: $table//p/br/following-sibling::text()[contains(.,"Added")]
postProcess:
- replace:
- regex: "Added "
with:
- parseDate: 2006-01-02
Performers: &performersAttr
Name: $performer/b/text()
URL:
selector: $performer/@href
postProcess: &domainAttr
- replace:
- regex: ^
with: http://amourangels.com
sceneScraper:
common: *commonSel
scene:
Title: *titleSel
Studio: *studioAttr
Date: *dateAttr
Performers: *performersAttr
Image:
selector: //td[@class="noisebg"]//img/@src
postProcess: *domainAttr
# Last Updated October 21, 2021

View File

@@ -0,0 +1,9 @@
id: AmourAngels
name: AmourAngels
metadata: {}
version: a438e4e
date: "2021-10-25 20:23:49"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AmourAngels.yml

View File

@@ -0,0 +1,62 @@
import json
import sys
import re
from pathlib import Path
try:
import requests
except ModuleNotFoundError:
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
sys.exit()
def debug(t):
sys.stderr.write(t + "\n")
def query_url(query):
res = requests.get(f"https://www.analvids.com/api/autocomplete/search?q={query}")
data = res.json()
results = data['terms']
if len(results) > 0:
if len(results) > 1:
debug("Multiple results. Taking first.")
return results[0]
def detect_delimiter(title):
delimiters = [" ", "_", "-", "."]
for d in delimiters:
if d in title:
return d
debug(f"Could not determine delimiter of `{title}`")
def find_scene_id(title):
# Remove file extension
title = Path(title).stem
title = title.replace("'", "")
delimiter = detect_delimiter(title)
parts = title.split(delimiter)
for part in parts:
if len(part) > 3:
if re.match(r'^(\w{2,3}\d{3,4})$', part):
if not part[0].isdigit() and part[-1].isdigit():
return part
if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read())
debug(json.dumps(fragment))
scene_id = find_scene_id(fragment['title'])
if not scene_id:
debug(f"Could not determine scene id in title: `{fragment['title']}`")
else:
debug(f"Found scene id: {scene_id}")
result = query_url(scene_id)
if result is not None:
if result["type"] == "scene":
debug(f"Found scene {result['name']}")
fragment["url"] = result["url"]
fragment["title"] = result["name"]
else:
debug("No scenes found")
print(json.dumps(fragment))

View File

@@ -0,0 +1,60 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: "AnalVids"
sceneByURL:
- action: scrapeXPath
url:
- analvids.com/watch/
- pissvids.com/watch/
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- analvids.com/model/
- pissvids.com/model/
scraper: performerScraper
sceneByFragment:
action: script
script:
- python
# use python3 instead if needed
- AnalVids.py
- query
xPathScrapers:
sceneScraper:
common:
$title: //h1[contains(@class, "watch__title")]//text()[not(ancestor::span)]
scene:
Title:
selector: $title
concat: " "
Date:
selector: //i[contains(@class, "bi-calendar3")]/text()
postProcess:
- parseDate: 2006-01-02
Details:
selector: //div[contains(@class, "text-mob-more")]//text()[not(parent::span[contains(@class, "dots")])]
concat: " "
Code:
selector: $title
postProcess:
- replace:
- regex: .+?([A-Z]{2,3}\d+)$|(.+)
with: $1
Performers:
Name: //h1[contains(@class, "watch__title")]//a/text()
URL: //h1[contains(@class, "watch__title")]//a/@href
Studio:
Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/text()
URL: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/@href
Tags:
Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/genre/")]/text()
Image: //video/@data-poster
performerScraper:
performer:
Name: //h1
Country: //a[contains(@href, "nationality")]
Image: //div[contains(@class, 'model__left')]//img/@src
# Last Updated August 16, 2023

View File

@@ -0,0 +1,10 @@
id: AnalVids
name: AnalVids
metadata: {}
version: fbd81c5
date: "2023-11-22 00:31:17"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AnalVids.yml
- AnalVids.py

View File

@@ -0,0 +1,298 @@
name: "Andomark"
sceneByURL:
- action: scrapeXPath
url:
- american-pornstar.com/updates/
- ariellynn.com/tour/updates/
- ashley4k.com/updates/
- behindtheporno.com/updates/
- bigboobiesclub.com/updates/
- bigbouncybabes.com/updates/
- bigtoyxxx.com/updates/
- bondagelegend.com/updates/
- bradsterling.elxcomplete.com/updates/
- britstudio.xxx/updates/
- brittanyandrewsxxx.com/updates/
- brittanysbubbles.com/updates/
- charlieforde.com/updates/
- chocolatepov.com/updates/
- collectivecorruption.com/updates/
- datingapphookup.com/updates/
- dirtroadwarriors.com/updates/
- furrychicks.elxcomplete.com/updates/
- hollyhotwife.elxcomplete.com/updates/
- houseofyre.com/updates/
- humiliation4k.com/updates/
- hungarianhoneys.com/tour/updates/
- internationalnudes.com/updates/
- johnnygoodluck.com/updates/
- justgoodsex.com/updates/
- justpov.com/tour/updates/
- lasvegasamateurs.com/tour/updates/
- mackmovies.com/updates/
- melaniehicksxxx.com/updates/
- nylons4k.com/updates/
- oldsexygrannies.com/updates/
- pawged.com/tour/updates/
- pawgnextdoor.com/tour/updates/
- playpetvictoria.com/updates/
- queercrush.com/updates/
- ravenswallowzxxx.com/updates/
- reidmylips.com/updates/
- reidmylips.elxcomplete.com/updates/
- rionkingxxx.com/updates/
- seanmichaelsxxx.com/updates/
- secretsusan.com/updates/
- sheseducedme.com/updates/
- sofiemariexxx.com/updates/
- tabooadventures.elxcomplete.com/updates/
- texaspattiusa.com/updates/
- thatfetishgirl.com/updates/
- tmfetish.com/updates/
- vanillapov.com/updates/
- willtilexxx.com/updates/
- xevunleashed.com/updates/
- xxxcellentadventures.com/updates/
- younggunsxxx.com/updates/
- yummybikinimodel.com/updates/
- yummygirl.com/updates/
- yummygirlz.com/updates/
- yummypornclub.com/updates/
- yummysexclub.com/updates/
- yummysofie.com/updates/
- yummywomen.com/updates/
scraper: sceneScraper
- action: scrapeXPath
url:
# Many sites have better metadata in the members access section, but these
# are the ones that ONLY provide the members access version of scene pages
- beatmeup.com/access/scenes/
- goonmuse.com/access/scenes/
- xevunleashed.com/access/scenes/
scraper: proSceneScraper
- action: scrapeXPath
url:
# These sites have a members access section that is not linked from their main page
# note that these pages often have more tags and a full description but no cover image
- ariellynn.com/access/scenes/
- ashley4k.com/access/scenes/
- behindtheporno.com/access/scenes/
- bigboobiesclub.com/access/scenes/
- bigbouncybabes.com/access/scenes/
- bigtoyxxx.com/access/scenes/
- bradsterling.elxcomplete.com/access/scenes/
- brittanyandrewsxxx.com/access/scenes/
- brittanysbubbles.com/access/scenes/
- chocolatepov.com/access/scenes/
- datingapphookup.com/access/scenes/
- dirtroadwarriors.com/access/scenes/
- furrychicks.elxcomplete.com/access/scenes/
- hollyhotwife.elxcomplete.com/access/scenes/
- houseofyre.com/access/scenes/
- humiliation4k.com/access/scenes/
# Hungarian Honeys has a members access section but it's not linked from the main page
# it also serves pages with status 500 so they're unscrapable right now, making a note
# here in case we ever rewrite to a script scraper
- hungarianhoneys.com/trailers/
- internationalnudes.com/access/scenes/
- johnnygoodluck.com/access/scenes/
- laurenphillips.com/access/scenes/
- mackmovies.com/access/scenes/
- melaniehicksxxx.com/access/scenes/
- nylons4k.com/access/scenes/
- oldsexygrannies.com/access/scenes/
- playpetvictoria.com/access/scenes/
- queercrush.com/access/scenes/
- ravenswallowzxxx.com/access/scenes/
- reidmylips.com/access/scenes/
- reidmylips.elxcomplete.com/access/scenes/
- rionkingxxx.com/access/scenes/
- seanmichaelsxxx.com/access/scenes/
- secretsusan.com/access/scenes/
- sheseducedme.com/access/scenes/
- sheseducedme.com/vod/scenes/
- sofiemariexxx.com/access/scenes/
- tabooadventures.elxcomplete.com/access/scenes/
- terapatrick.com/access/scenes/
- texaspattiusa.com/access/scenes/
- thatfetishgirl.com/access/scenes/
- tmfetish.com/access/scenes/
- vanillapov.com/access/scenes/
- willtilexxx.com/access/scenes/
- xxxcellentadventures.com/access/scenes/
- younggunsxxx.com/access/scenes/
- yummybikinimodel.com/access/scenes/
- yummygirl.com/access/scenes/
- yummygirlz.com/access/scenes/
- yummypornclub.com/access/scenes/
- yummysexclub.com/access/scenes/
- yummysofie.com/access/scenes/
- yummywomen.com/access/scenes/
scraper: proSceneWithImgFallbackScraper
xPathScrapers:
sceneScraper:
common:
$scene: //div[contains(@class, "update_block")]
$excludeUpdates: not(ancestor::*[contains(@class, "mpuSection")])
scene:
Title: $scene//span[@class="update_title"]/text()|//title/text()
Date:
# Some sites hide their release date in a comment
selector: >-
//*[(contains(@class, "availdate") or contains(@class, "update_date")) and contains(., "/")]
| //*[(contains(@class, "availdate") or contains(@class, "update_date"))]/comment()
| (//*[contains(@class,'fa-calendar')]/following-sibling::text())[1]
postProcess: &datePostProcess
- replace:
- regex: ".*?([0-9]{2}/[0-9]{2}/[0-9]{4}).*"
with: $1
- parseDate: 01/02/2006
Details: $scene//span[@class="latest_update_description"]
Performers:
Name: $scene//span[@class="tour_update_models" and $excludeUpdates]/a
Tags:
Name: $scene//span[contains(@class, "update_tags")]/a
Studio:
Name:
selector: //link[@rel="canonical"]/@href
postProcess: &studioPostProcess
- replace:
- regex: https://(w{3}?\.?)?(.+?)(\..+)
with: $2
- map:
ariellynn: Ariel Lynn
ashley4k: Ashley 4K
beatmeup: BeatMeUp
behindtheporno: Behind The Porno
bigboobiesclub: Big Boobies Club
bigbouncybabes: Big Bouncy Babes
bigtoyxxx: Big Toy XXX
bondagelegend: Bondage Legend
bradsterling: Brad Sterling
britstudio: Brit Studio
brittanyandrewsxxx: Britttany Andrews
brittanysbubbles: Brittany Andrews
charlieforde: Charlie Forde
chocolatepov: ChocolatePOV
collectivecorruption: Collective Corruption
datingapphookup: Dating App Hook Ups
dirtroadwarriors: Dirt Road Warriors
furrychicks: Furry Chicks
goonmuse: GoonMuse
hollyhotwife: HollyHotWife
houseofyre: House of Fyre
humiliation4k: Humiliation 4K
hungarianhoneys: Hungarian Honeys
internationalnudes: International Nudes
johnnygoodluck: Johnny Goodluck
justgoodsex: JustGoodSex
justpov: Just POV
lasvegasamateurs: Las Vegas Amateurs
laurenphillips: Lauren Phillips
mackmovies: Mack Movies
melaniehicksxxx: Melanie Hicks XXX
nylons4k: Nylons 4K
oldsexygrannies: OldSexyGrannies
pawged: PAWGED
pawgnextdoor: PAWG Next Door
playpetvictoria: PlaypetVictoria
queercrush: QueerCrush
ravenswallowzxxx: Raven Swallows
reidmylips: Reid My Lips
rionkingxxx: Rion King
seanmichaelsxxx: Sean Michaels
secretsusan: Secret Susan
sheseducedme: She Seduced Me
sofiemariexxx: Sofie Marie XXX
tabooadventures: Taboo Adventures
terapatrick: Terra Patrick
texaspattiusa: TexasPattiUSA
thatfetishgirl: ThatFetishGirl
tmfetish: TMFetish
vanillapov: VanillaPOV
willtilexxx: Will Tile XXX
xevunleashed: Xev Unleashed
xxxcellentadventures: XXXcellentAdventures
younggunsxxx: YoungGunsXXX
yummybikinimodel: YummyBikini
yummygirl: Yummygirl
yummygirlz: Yummygirlz
yummypornclub: YummyPornClub
yummysexclub: Spik Irons' YummySexClub
yummysofie: Yum Sofie
yummywomen: Yummywomen
Image: &image
selector: //base/@href | //div[@class="update_image"]/a/img[@src0_1x]/@src0_1x | //span[@class="model_update_thumb"]/img/@src
concat: /
postProcess:
- replace:
- regex: ^(https://.+)?https://
with: https://
- regex: tour///
with: ""
URL: //link[@rel='canonical']/@href
proSceneScraper:
common:
# We need to exclude anything inside a "category_listing_block" because those are suggested content and would give us false positives
$excludeUpdates: not(ancestor::*[contains(@class, "category_listing_block")])
scene:
Title: //div[@class="title_bar"]/span
Date:
selector: //div[contains(@class, "update_date") and $excludeUpdates and contains(., "/")]
postProcess: *datePostProcess
Details: //span[@class="update_description"]
Performers:
Name: //span[@class="update_models" and $excludeUpdates]/a
Tags:
Name: //span[@class="update_tags" and $excludeUpdates]/a
Studio:
Name:
selector: //base/@href
postProcess: *studioPostProcess
Image:
selector: //base/@href | //script[contains(text(), 'useimage = "/')]/text()
concat: __SEPARATOR__
postProcess:
- replace:
# If the image was empty we clear the URL
- regex: .*/$
with: ""
- regex: (https://[^/]+).*useimage = "([^"]*).*
with: $1$2
proSceneWithImgFallbackScraper:
common:
# We need to exclude anything inside a "category_listing_block" because those are suggested content and would give us false positives
$excludeUpdates: not(ancestor::*[contains(@class, "category_listing_block")])
scene:
Title: //div[@class="title_bar"]/span
Date:
selector: //div[contains(@class, "update_date") and $excludeUpdates and contains(., "/")]
postProcess: *datePostProcess
Details: //span[@class="update_description"]
Performers:
Name: //span[@class="update_models" and $excludeUpdates]/a
Tags:
Name: //span[@class="update_tags" and $excludeUpdates]/a
Studio:
Name:
selector: //base/@href
postProcess: *studioPostProcess
Image:
selector: //a[@class="option_button"]/@href
postProcess:
- replace:
- regex: _(vids|caps|highres).html$
with: .html
- regex: /access/scenes/
with: /updates/
- subScraper: *image
driver:
cookies:
- CookieURL: https://sheseducedme.com
Cookies:
- Name: nats
Domain: sheseducedme.com
ValueRandom: 36
Path: /
# Last Updated April 1, 2024

View File

@@ -0,0 +1,9 @@
id: Andomark
name: Andomark
metadata: {}
version: 031c040
date: "2024-04-08 01:29:05"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Andomark.yml

View File

@@ -0,0 +1,60 @@
name: "AngelaWhite"
sceneByURL:
- action: scrapeXPath
url:
- angelawhite.com/tour/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$info: //div[@class="video-details-inner"]
$search: //div[@class="video-details-inner"]/h2/text()
scene:
Title: $info/h2/text()
Date:
selector: $info/span/text()
postProcess:
- parseDate: Jan 2, 2006
Details: $info//p/text()
Performers:
Name:
selector: $search
postProcess:
- replace:
- regex: (^\d+)\s.+
with: "http://angelawhite.com/tour/search?query=$1"
- subScraper:
selector: //div[@class="models-list"]/a/text()
concat: ","
split: ","
Tags:
Name:
selector: $search
postProcess:
- replace:
- regex: (^\d+)\s.+
with: "http://angelawhite.com/tour/search?query=$1"
- subScraper:
selector: //div[@class="categories-list"]/a/text()
concat: ","
split: ","
Image:
selector: $search
postProcess:
- replace:
- regex: \s
with: "+"
- regex: ^
with: "http://angelawhite.com/tour/search?query=\""
- regex: $
with: "\""
- subScraper:
selector: //img/@src0
postProcess:
- replace:
- regex: ^
with: http://angelawhite.com
Studio:
Name: //a[@class="logo-aw"]/@title
# Last Updated November 08, 2020

View File

@@ -0,0 +1,9 @@
id: AngelaWhite
name: AngelaWhite
metadata: {}
version: 66d4760
date: "2021-07-18 15:48:20"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AngelaWhite.yml

View File

@@ -0,0 +1,233 @@
name: AniDB
# ~~~~~~ GETTING STARTED ~~~~~~
# Store this file in the ~/stash/scrapers/AniDB.yml
# - If the scrapers directory is not there, you must create it first
#
# ~~~~~~ SETTING COOKIES ~~~~~~
# Note: I recommend creating a new account just for this scraper
# 1. Access the anidb.net website > login > right button > inspect > find cookies storage
# 2. Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document
# 3. If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content
# 4. Do not change the order of the columns, as it can make it stop working
#
# ~~~~~~ SETTING USER AGENT ~~~~~~
# - Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent
# - Use the User Agent of your choice
# - For example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0
#
# ~~~~~ RECOMMENDED WORKFLOW ~~~~~
# 1. Scrape scene by fragment (for performers, tags, artwork, etc)
# - If this fails, scrape by anime URL
# 2. Scrape by episode URL (for title, date)
# 3. Manually set movie scene number on scene page
# 3. Navigate to each performer's page & scrape by URL
# 4. Navigate to movie page & scrape by URL
#
# ~~~~~~ HOW TO USE (detailed) ~~~~~~
# tl;dr when in doubt, use the URL scrapers
# - For scenes: anidb.net/episode/XXX, anidb.net/anime/XXX
# - For performers: anidb.net/character/XXX
# - For movies: anidb.net/anime/XXX
#
# SCENES (by anime):
# - The Scraper by Fragment will usually work, assuming a filename like "[XX] My Lewd Anime - 01 (720p) (x264).mkv"
# - This regex expression strips underscores, dashes, content containing brackets and parentheses, and two digit numbers
# - For example, the above filename is stripped to "My Lewd Anime"
# - If this does not work, I recommend scraping with the episode URL, the anime URL, or the name scraper
# - By default, the scene scraper does not set the title, as the episode scraper serves this purpose better
# - However, if you'd like to enable this functionality, uncomment the "Title" line in sceneScraperAnime > scene
# - The scene (by anime) scraper automatically creates a new movie (i.e., series) entry,
# but unfortunately you will have to set the movie scene (i.e., episode) number manually
#
# SCENES (by episode):
# - This scraper is only accessible by scraping the episode URL (anidb.net/episode/XXX)
# - The scene episode scraper sets the episode title, the anime URL (if missing), and the original airing date
# - By default, the regex expression strips the episode number when setting the title
# - If you want to keep the episode number, delete the second regex replacement in
# sceneScraperEpisode > scene > Title > postProcess > replace
#
# MOVIES:
# - The scene (by anime) scraper automatically creates a new movie entry using the anime title and anime URL
# - On the movie page, you can scrape by URL
#
# PERFORMERS:
# - Performers need to be individually scraped by name or URL
# - I recommend creating them by scraping the anime URL, then navigating to the performer page.
# The performer URL should already be set, so you just need to press the scrape by URL button.
#
# ~~~~~ TROUBLESHOOTING ~~~~~
# - If you find that the scraper has suddenly stopped working, RESET YOUR COOKIES!
#
# ~~~~~ ANYTHING ELSE? ~~~~~
# THAT'S IT, ENJOY!
# Made by @escargotbuffed, further improvements by @symptom6186
performerByURL:
- action: scrapeXPath
url:
- https://anidb.net
scraper: performerScraper
performerByName:
action: scrapeXPath
queryURL: https://anidb.net/search/anime/?adb.search={}&entity.chartb=1
scraper: performerSearch
sceneByFragment:
action: scrapeXPath
queryURL: https://anidb.net/anime/?adb.search={filename}
queryURLReplace:
filename:
- regex: '\[.*?\]|\(.*?\)|\d\d|\..*'
with:
- regex: '\-|\_'
with: " "
- regex: \s+
with: "%20"
scraper: sceneScraperAnime
sceneByURL:
- action: scrapeXPath
url:
- https://anidb.net/episode/
scraper: sceneScraperEpisode
- action: scrapeXPath
url:
- https://anidb.net/anime/
scraper: sceneScraperAnime
sceneByName:
action: scrapeXPath
queryURL: https://anidb.net/search/anime/?adb.search={}&entity.animetb=1
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraperAnime
movieByURL:
- action: scrapeXPath
url:
- https://anidb.net/
scraper: sceneScraperAnime
xPathScrapers:
performerSearch:
performer:
Name: //td[@class="relid"]/a
URL:
selector: //td[@class="relid"]/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
performerScraper:
common:
$info: //div[@class="g_section info"]
$tab_1_pane: //div[@class="g_section info"]//div[@id="tab_1_pane"]
$looks: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, 'looks')]
performer:
Name: $tab_1_pane//tr[contains(@class, 'mainname')]//span[@itemprop="name"]
Aliases: $tab_1_pane//tr[contains(@class, 'official')]//label[@itemprop="alternateName"]
Disambiguation: $tab_1_pane//tr[contains(@class, 'mainname')]//a[@class='shortlink']
Gender: $tab_1_pane//tr[contains(@class, 'gender')]//span[@itemprop="gender"]
Ethnicity: $tab_1_pane//tr[contains(@class, 'entity')]//span[@class="tagname"]
HairColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'hair')]
EyeColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'eyes')]
Height: $tab_1_pane//tr[contains(@class, 'height')]//span[@itemprop="height"]
Weight: $tab_1_pane//tr[contains(@class, 'weight')]//span[@itemprop="weight"]
#Measurements: Todo
URL: //link[@rel="canonical"]/@href
Details:
selector: //div[@itemprop="description"]//text()
concat: "\n"
Tags:
Name: $tab_1_pane//span[@class="g_tag"]//span[@class="tagname"]
Image: $info//div[@class="image"]//img/@src
sceneSearch:
scene:
Title: //td[@class="relid"]/a
URL:
selector: //td[@class="relid"]/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Image: //td[@class="thumb anime"]//img/@src
sceneScraperEpisode:
scene:
Title:
selector: //div[@id="layout-main"]//h1[@class="ep"]
postProcess:
- replace:
- regex: ^.{0,9}
with: ""
- regex: \- \d+ \-
with: "/"
URL:
selector: //ul[@class="main-tabs"]//li[@class="g_odd anime"]//span/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Date: //div[@id="layout-main"]//span[@itemprop="datePublished"]/@content
sceneScraperAnime:
common:
$info: //div[@class="g_section info"]
$title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//span[@itemprop="name"]
$en_title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, "official verified") and contains(.//span, 'en')]//label[@itemprop="alternateName"]
$character: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"]
scene:
#Title: $en_title or $title
#Date:
# selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")]
# parseDate: 02.01.2006
Details:
selector: //div[@itemprop="description"]//text()
concat: " "
Tags:
Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"]
Performers:
Name: $character/a/span
URL:
selector: $character/a/@href
postProcess:
- replace:
- regex: ^
with: https://anidb.net
Movies:
Name: $title
URL: //link[@rel="canonical"]/@href
Studio:
Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a
Image: $info//div[@class="image"]//img/@src
URL: //link[@rel="canonical"]/@href
movie:
Name: $title
Aliases: $en_title
Date:
selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")]
postProcess:
- parseDate: 02.01.2006
Synopsis:
selector: //div[@itemprop="description"]//text()
concat: " "
Studio:
Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a
FrontImage: $info//div[@class="image"]//img/@src
URL: //link[@rel="canonical"]/@href
driver:
cookies:
- CookieURL: "https://anidb.net/"
Cookies:
# Access adult content requires a anidb account
# Replace value field
- Name: "adbsess"
Domain: "anidb.net"
Value: "" # Enter the value of the 'adbsess' here
Path: "/"
- Name: "adbuin"
Domain: "anidb.net"
Value: "" # Enter the value of the 'adbuin' here
Path: "/"
# Last Updated Dec 20, 2023

View File

@@ -0,0 +1,9 @@
id: AniDB
name: AniDB
metadata: {}
version: 4f63378
date: "2023-12-27 05:50:11"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AniDB.yml

View File

@@ -0,0 +1,129 @@
name: Anime-DB
sceneByName:
action: scrapeXPath
queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url=%2F%2Fadultanime.dbsearch.net%2Fsearch%2F%3Fkeyword%3D{}&check=true"
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true"
queryURLReplace:
url:
- regex: "https:"
with:
- regex: \/
with: "%2F"
scraper: sceneScraper
sceneByURL:
- action: scrapeXPath
url:
- adultanime.dbsearch.net
queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true"
queryURLReplace:
url:
- regex: "https:"
with:
- regex: \/
with: "%2F"
scraper: sceneScraper
xPathScrapers:
sceneSearch:
scene:
Title: //div[@class="item-info"]/h4/a/text()
URL:
selector: //div[@class="item-info"]/h4/a/@href
postProcess:
- replace:
- regex: "^"
with: "https:"
Image:
selector: //section[@class="item-box"]/div[@class="item-img"]/a/img/@data-src
postProcess:
- replace:
- regex: "^"
with: "https:"
- regex: "/basic/"
with: "/small/"
- regex: '\.jpg'
with: "_s.jpg"
Date:
selector: //div[@class="item-info"]/p[@class="ndate"]/span/text()
postProcess:
- replace: # 2006年1月2日
- regex: "\u5E74|\u6708"
with: "-"
- regex: "\u65E5"
with:
- regex: -(\d)-
with: -0$1-
- regex: -(\d)$
with: -0$1
Studio:
Name: //div[@class="item-info"]/p[@class="maker"]/a/text()
sceneScraper:
scene:
Title: //meta[@property="og:title"]/@content
URL: //meta[@property="og:url"]/@content
Details:
selector: //section[@class="iteminfo-box"]/blockquote/p[@class="pq"]//text()
concat: "\n\n"
Code: //dt[text()="規格品番"]/following-sibling::dd[1]/p/text()
Image: //section[@id="sample-image"]/img/@data-src
Tags:
Name:
selector: //nav[@id="tag-list"]/ul/li/a/text()
postProcess:
- map: # remove all 作品形式 (format) tags
DVD: ""
オリジナルアニメ作品: ""
PCゲーム原作アニメ: ""
コミック原作アニメ: ""
ライトノベル・ノベル原作アニメ: ""
同人原作アニメ: ""
アダルトコミック原作アニメ: ""
ボーイズラブアニメ作品: ""
廉価版アニメ: ""
BD-BOX・DVD-BOX: ""
3D: ""
RPG: ""
アクション: ""
麻雀・テーブルゲーム: ""
3Dポリゴン: ""
廉価版・新装版: ""
萌えゲーアワード受賞: ""
4時間以上作品: ""
ベスト・総集編: ""
サンプル動画: ""
アドベンチャー: ""
シミュレーション: ""
Blu-rayブルーレイ: ""
DVDPG: ""
UMD: ""
VFT: ""
フルボイス: ""
廉価版: ""
BDPG: ""
売り尽くしセール: "" # other unneeded tags
Yahooコメント掲載禁止: ""
アニメ: ""
特典付き・セット商品: ""
Studio:
Name: //dt[text()="レーベル"]/following-sibling::dd[1]/p/text()
Date:
selector: //dt[text()="発売日"]/following-sibling::dd[1]/p/text()
postProcess:
- replace: # 2006年1月2日
- regex: "\u5E74|\u6708"
with: "-"
- regex: "\u65E5"
with:
- regex: -(\d)-
with: -0$1-
- regex: -(\d)$
with: -0$1
driver:
useCDP: true # needed for the age confirmation redirect - cookies only work temporarily
# Last Updated January 22, 2023

View File

@@ -0,0 +1,9 @@
id: Anime-DB
name: Anime-DB
metadata: {}
version: 2c5a687
date: "2023-02-25 23:42:56"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- Anime-DB.yml

View File

@@ -0,0 +1,267 @@
import json
import os
import re
import sys
from datetime import datetime
# to import from a parent directory we need to add that directory to the system path
csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
sys.path.append(
parent
) # add parent dir to sys path so that we can import py_common from there
try:
import cloudscraper
except ModuleNotFoundError:
print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr)
sys.exit()
try:
import requests
except ModuleNotFoundError:
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
sys.exit()
try:
from lxml import html
except ModuleNotFoundError:
print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr)
sys.exit()
try:
import py_common.log as log
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr)
sys.exit(1)
# --------------------------------------
# This is a scraper for: animecharactersdatabase.com
#
# AnimeCharactersDatabase includes characters from:
# Anime, Hentai, (Mobile) Games, Eroge, Virtual Idols/YouTubers, Vocaloid
#
# These fields will be populated if available:
# Name, Gender, Birthdate, Country, Hair Color, Eye Color, Height, Measurements, URL, Details, Tags, Image
#
# A number of additional tags can be configured below.
# ---------------------------------------
# ---------- Tag Configuration ----------
# ---------------------------------------
# Maximum number of search results (between 1 and 30).
# Search by name includes the franchise for each result to make it easier to choose the correct one.
# Some (non ascii, very short) names require querying the API individually to get the franchise for each result.
# This might get you banned, since the API is rate limited.
# See: http://wiki.animecharactersdatabase.com/index.php?title=API_Access
limit = 15
# Prefix for performer tags.
prefix = "performer:"
# List of additional tags.
additional_tags = [{"name": "fictional"}] # []
# Tags mostly include appearance indicators like: ahoge, dress, hat, twintails, etc.
include_tag = True
tag_prefix = prefix
# Scrape the source material as tag (name of anime/game): Kantai Collection, Idolmaster: Cinderella Girls, etc.
include_parody = True
parody_prefix = "parody:"
# Scrape Zodiac Sign as tag: Libra ♎, Sagittarius ♐, etc.
include_sign = True
sign_prefix = prefix + "sign:"
# Scrape race of non-human characters as tag: Orc, Elf, etc.
include_race = True
race_prefix = prefix + "race:"
# Scrape ship class of ship girls as tag (kancolle, etc.): Destroyer, etc.
include_ship_class = True
ship_class_prefix = prefix + "ship:"
# Scrape blood type as tag: A, B, etc.
include_blood_type = True
blood_type_prefix = prefix + "Blood Type "
# Scrape apparent age as tag: Adult, Teen, etc.
# Might differ from canonical age.
# Canonical age will be ignored, since it would result in too many tags.
# Birthdate is sometimes available, but the resulting calculated age represents neither canonical age nor apparent age.
include_apparent_age = True
apparent_age_prefix = prefix + "Apparent "
# Scrape Hair Length as tag: To Shoulders, To Neck, Past Waist, etc.
include_hair_length = True
hair_length_prefix = prefix + "Hair "
# ---------------------------------------
# ---------------------------------------
# ---------------------------------------
def readJSONInput():
input = sys.stdin.read()
return json.loads(input)
def scrapeURL(url):
return html.fromstring(scrapeUrlToString(url))
def scrapeUrlToString(url):
scraper = cloudscraper.create_scraper()
try:
scraped = scraper.get(url)
except:
log.error("scrape error")
sys.exit(1)
if scraped.status_code >= 400:
log.error('HTTP Error: %s' % scraped.status_code)
sys.exit(1)
return scraped.content
def performerByName(query):
cleanedQuery = requests.utils.quote(query)
url = f"https://www.animecharactersdatabase.com/searchall.php?in=characters&sq={cleanedQuery}"
tree = scrapeURL(url)
names = tree.xpath("//li/div[@class='tile3top']/a/text()")
ids = tree.xpath("//li/div[@class='tile3top']/a/@href")
results = []
for name, id in zip(names, ids):
results.append({
"name": name,
"id": id.replace("characters.php?id=", ""),
"url": "https://www.animecharactersdatabase.com/" + id
})
log.info(f"scraped {len(results)} results on: {url}")
return results
def addFranchise(query, results):
cleanedQuery = requests.utils.quote(query)
url = f"https://www.animecharactersdatabase.com/api_series_characters.php?character_q={cleanedQuery}"
data = json.loads(scrapeUrlToString(url))
count1 = 0
count2 = 0
for result in results:
try:
# Try to find the franchise in API search results.
# These results are ordered by alphabet and limited to 100,
# so short queries might not include the correct result.
# The API query also does not seem to support any Kanji.
franchise = next(e["anime_name"] for e in data["search_results"] if str(e["id"]) == result["id"])
count1 += 1
except:
# Use separate API calls as a backup.
# This might get you banned, since the API is rate limited.
franchise = apiGetCharacter(result["id"])["origin"]
count2 += 1
# Append franchise to character name for easier differentiation.
result["name"] = f"{result['name']} ({franchise})"
result.pop("id")
log.debug(f"scraped {count1} franchises by single API call")
log.debug(f"scraped {count2} franchises by separate API calls")
return results
def apiGetCharacter(id):
url = f"https://www.animecharactersdatabase.com/api_series_characters.php?character_id={id}"
return json.loads(scrapeUrlToString(url))
def performerByURL(url, result={}):
log.debug("performerByURL: " + url)
tree = scrapeURL(url)
result["url"] = url
result["name"] = next(iter(tree.xpath(
"//h3[@id='section001_summary']/following-sibling::p/a[contains(@href,'character')]/text()")), "").strip()
result["details"] = "\n".join([s.strip() for s in tree.xpath(
"//div[@style='padding: 0 15px 15px 15px; text-align: left;']/text()")])
if not result["details"]:
result["details"] = re.sub(" .$", ".", " ".join([s.strip() for s in tree.xpath(
"//h3[@id='section001_summary']/following-sibling::p[contains(a/@href,'character')]//text()") if
s.strip()]))
result["image"] = next(iter(tree.xpath("//meta[@property='og:image']/@content")), "")
# left table, works for link and plain text fields, return result list
def parse_left(field):
template = "//table//th[text()='{0}' or a/text()='{0}']/following-sibling::td/a/text()"
return tree.xpath(template.format(field))
result["tags"] = additional_tags
if include_tag:
result["tags"] += [{"name": tag_prefix + tag.strip()} for tag in parse_left("Tags ")]
if include_parody:
result["tags"] += [{"name": parody_prefix + tag.strip()} for tag in parse_left("From")]
if include_blood_type:
result["tags"] += [{"name": blood_type_prefix + tag.strip()} for tag in parse_left("Blood Type")]
if include_race:
result["tags"] += [{"name": race_prefix + tag.strip()} for tag in parse_left("Race")]
if include_sign:
result["tags"] += [{"name": sign_prefix + tag.strip()} for tag in parse_left("Sign")]
if include_ship_class:
result["tags"] += [{"name": ship_class_prefix + tag.strip()} for tag in parse_left("Ship Class")]
result["country"] = next(iter(parse_left("Nationality")), "")
birthday = parse_left("Birthday")
birthyear = parse_left("Birthyear")
if birthday and birthyear:
birthdate = datetime.strptime(birthday[0].strip(), "%B %d").replace(year=int(birthyear[0].strip()))
result["birthdate"] = birthdate.strftime("%Y-%m-%d")
bust = parse_left("Bust")
waist = parse_left("Waist")
hip = parse_left("Hip")
if bust and waist and hip:
bust = bust[0].strip().replace("cm", "")
waist = waist[0].strip().replace("cm", "")
hip = hip[0].strip().replace("cm", "")
result["measurements"] = "{}-{}-{}".format(bust, waist, hip)
result["height"] = next(iter(parse_left("Height")), "").strip().replace("cm", "")
# middle/right table, reverse result list to prefer official appearance, return result or empty string
def parse_right(field):
template = "//table//th[text()='{}']/following-sibling::td/text()"
return next(reversed(tree.xpath(template.format(field))), "").strip().replace("Unknown", "")
# should be tagged anyway if yes
# if parse_right("Animal Ears") == "Yes":
# result["tags"] += [{"name": "performer:animal ears"}]
hair_length = parse_right("Hair Length")
if include_hair_length and hair_length:
result["tags"] += [{"name": hair_length_prefix + hair_length}]
apparent_age = parse_right("Apparent Age")
if include_apparent_age and apparent_age:
result["tags"] += [{"name": apparent_age_prefix + apparent_age}]
result["gender"] = parse_right("Gender")
result["eye_color"] = parse_right("Eye Color")
result["hair_color"] = parse_right("Hair Color")
return result
# read the input
i = readJSONInput()
if sys.argv[1] == "performerByURL":
url = i["url"]
result = performerByURL(url)
print(json.dumps(result))
elif sys.argv[1] == "performerByName":
name = i["name"]
log.info(f"Searching for name: {name}")
results = performerByName(name)[:limit]
results = addFranchise(name, results)
print(json.dumps(results))

View File

@@ -0,0 +1,20 @@
name: AnimeCharactersDatabase
# requires: py_common
performerByURL:
- action: script
url:
- animecharactersdatabase.com/characters.php
script:
- python
- AnimeCharactersDatabase.py
- performerByURL
performerByName:
action: script
script:
- python
- AnimeCharactersDatabase.py
- performerByName
# Last Updated January 23, 2022

View File

@@ -0,0 +1,10 @@
id: AnimeCharactersDatabase
name: AnimeCharactersDatabase
metadata: {}
version: 4fcb313
date: "2023-12-19 22:39:58"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AnimeCharactersDatabase.yml
- AnimeCharactersDatabase.py

View File

@@ -0,0 +1,67 @@
name: AntonioSuleiman.com
sceneByURL:
- action: scrapeXPath
url:
- antoniosuleiman.com
scraper: sceneScraper
sceneByName:
action: scrapeXPath
queryURL: https://antoniosuleiman.com/search.php?query={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$scene: //div[contains(@class, "updatesBlock")]
$image: (//div[contains(@class, "updatesBlock")]//img)[1]
scene:
Title: $scene//h3
Date:
selector: ($scene//div[contains(@class, "updateDetails")]//p)[1]
postProcess:
- parseDate: 2006-01-02
Details: $scene/div[@class="wrapper"]/*[last()]
URL: //link[@rel="canonical"]/@href
Tags:
Name:
# The worst way to do tags but it's all they have
selector: //meta[@name="keywords"]/@content
split: ","
Performers:
Name: $scene//*[contains(@class,"tour_update_models")]//a
Image: >-
$image/@src0_4x |
$image/@src0_3x |
$image/@src0_2x |
$image/@src0_1x
Studio:
Name:
fixed: Antonio Suleiman
sceneSearch:
common:
$scene: //div[@data-url]
scene:
Title: $scene//p[@class="left-first-paragraph"]
URL: $scene/@data-url
# Search doesn't return any description but we can show the performers instead
Details: //div[@data-url]//p[@class="left-second-paragraph"]
Image: $scene//img/@src0_1x
Date: $scene//p[@class="right-paragraph" and not(span)]
driver:
cookies:
- CookieURL: "https://antoniosuleiman.com"
Cookies:
- Name: "PHPSESSID"
Domain: ".antoniosuleiman.com"
# Unsure about the duration of this cookie
Value: ovejq7d8cfhoc99q1jrn265af8
Path: "/"
- Name: "lang"
Domain: ".antoniosuleiman.com"
# 0 is English, 1 is Arabic
Value: "0"
Path: "/"
# Last Updated November 20, 2023

View File

@@ -0,0 +1,9 @@
id: AntonioSuleiman
name: AntonioSuleiman.com
metadata: {}
version: 67a250d
date: "2023-11-21 23:41:38"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- AntonioSuleiman.yml

View File

@@ -0,0 +1,34 @@
name: "Arch Angel Video"
sceneByURL:
- action: scrapeXPath
url:
- archangelvideo.com/tour/trailers
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$videoContent: //div[@class="midwrap clear"]
scene:
Title: $videoContent//div[@class='title clear']/h2
Details: //div[@class='description']/p
Date:
selector: $videoContent//div[@class='info']/p/text()[contains(.,"Added:")]
postProcess:
- replace:
- regex: "Added: "
with: ""
- parseDate: January 2, 2006
Tags:
Name: //ul[@class='tags']/li/a
Studio:
Name:
fixed: ArchAngel
Image:
selector: $videoContent//img/@src
postProcess:
- replace:
- regex: ^
with: "https://archangelvideo.com"
Performers:
Name: $videoContent//div[@class='info']/p[1]/a
# Last Updated March 07, 2022

Some files were not shown because too many files have changed in this diff Show More