stash

2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions
--- a/stash/config/scrapers/community/IFeelMyself/IFeelMyself.py
+++ b/stash/config/scrapers/community/IFeelMyself/IFeelMyself.py
@@ -0,0 +1,298 @@
+import json
+import re
+import sys
+from datetime import datetime
+import unicodedata
+
+# UNLESS logged in(and probably with an active subscription) scenes with certain tags(menstruation, pee) are hidden and can not be found by scraper.
+# Also performer scraper will not be able to get country and details without being logged in.
+# set value for ifeel_auth cookie here, may change and need to be renewed periodically.
+# if no account available leave value empty and scraper won't find some videos and country and details fields will be missing from performer scrapes.
+
+ifeelauth = ""
+
+try:
+    from mechanicalsoup import StatefulBrowser
+except ModuleNotFoundError:
+    print("You need to install the mechanicalsoup module. (https://mechanicalsoup.readthedocs.io/en/stable/introduction.html#installation)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install MechanicalSoup", file=sys.stderr)
+    sys.exit()
+
+try:
+    from requests.cookies import create_cookie
+except ModuleNotFoundError:
+    print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
+    print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
+    sys.exit()
+
+
+def readJSONInput():
+    input = sys.stdin.read()
+    return json.loads(input)
+
+def extract_SceneInfo(table,cover_url=None):
+    description = None
+    if table.find(class_= ["blog_wide_new_text","entryBlurb"]):
+        description=table.find(class_= ["blog_wide_new_text","entryBlurb"]).get_text(" ", strip=True)
+        description=unicodedata.normalize('NFKC', description).encode('ascii','ignore').decode('ascii')
+    date = table.find(class_=["blog-title-right","entryDatestamp"]).get_text(strip=True) #This is a BeautifulSoup element. New IFM scenes are under blog-title-right clase for date. Older videos use entryDatestamp class
+    performer = table.find(class_= ["entryHeadingFlash","entryHeading"]).find_all("a")[1].get_text().replace("_"," ")
+    performer = str(performer)
+    debugPrint(f"performer:{performer}")
+    date = datetime.strptime(date, '%d %b %Y').date().strftime('%Y-%m-%d') #Convert date to ISO format
+    if cover_url == None:
+        if table.find("img"):
+            cover_url=str(table.find("img")['src'])
+        else:
+            cover_url=str(table.find("video")['poster'])
+    title = table.find(class_= ["entryHeadingFlash","entryHeading"]).find('a').get_text().replace("\x92","'")
+    media_id = re.search(r"\/(\d{3,5})\/",cover_url,re.I).group(1)
+    artist_id = re.search(r"\/(f\d{4,5})",cover_url,re.I).group(1)
+    tags = table.find_all(class_="tags-list-item-tag")
+    tag_list = []
+    for tag in tags:
+        tag_list.append({"name": tag.get_text()})
+    debugPrint(f"tags: {str(tag_list)}")
+    json_info = {"title": title, "performers": [{"name": performer}], "studio": {"name": "I Feel Myself"}, "tags": tag_list, "date":date, "image": cover_url,"details": description, "url": "https://ifeelmyself.com/public/main.php?page=flash_player&out=bkg&media_id="+media_id+"&artist_id="+artist_id}
+    return json_info
+
+def debugPrint(t):
+    sys.stderr.write(t + "\n")
+
+def scrapeScene(filename,date,url):
+    ret = []
+    browser = StatefulBrowser(session=None)
+    browser.open("https://ifeelmyself.com/public/main.php")
+    cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    cover_url = None
+    if url:
+      debugPrint("Url found, using that to scrape")
+      if url.endswith(".jpg"):
+      #use the image url to extract the metadeta
+          media_id = re.search(r"\/(\d{3,5})\/",url,re.I).group(1)
+          artist_id = re.search(r"\/(f\d{4,5})",url,re.I).group(1)
+          debugPrint(f"Artist id found: {artist_id}")
+          debugPrint(f"Media id found: {media_id}")
+          cover_url = url
+          url = "https://ifeelmyself.com/public/main.php?page=flash_player&out=bkg&media_id="+str(media_id)+"&artist_id="+str(artist_id)
+      browser.open(url)
+      response = browser.page
+      table = response.find(class_ = ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+      if table:
+        ret = extract_SceneInfo(table,cover_url)
+    else:
+        debugPrint("Analyzing filename...")
+        artist_id_match=re.search(r"(f\d{3,5})",filename,re.I)
+        if artist_id_match:
+            artist_id = artist_id_match.group(0)
+            video_id = re.search(r"-(\d+)",filename,re.I).group(1)
+            cookie_obj = create_cookie(name='ifm_search_keyword', value=artist_id, domain='ifeelmyself.com')
+            browser.session.cookies.set_cookie(cookie_obj)
+            cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com')
+            browser.session.cookies.set_cookie(cookie_obj)
+            cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com')
+            browser.session.cookies.set_cookie(cookie_obj)
+            browser.open("https://ifeelmyself.com/public/main.php?page=search_results")
+            response = browser.page
+            debugPrint("Searching for video_id")
+            debugPrint(artist_id+"-"+video_id)
+            tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+            for table in tables:
+                    if table.find('video'): #New scenes use the video tag
+                        img=str(table.find("video")['poster'])
+                    elif table.find('img'): #old scenes still use the old format of a img tag
+                        img=str(table.find("img")['src'])
+                    debugPrint(f"Image:{str(img)}") 
+                    if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img):
+                        debugPrint("Found a single match video!")
+                        # Extract data from this single result
+                        ret = extract_SceneInfo(table)
+                        break
+            else:
+                sys.stderr.write("0 matches found! Checking offset")
+                pages=int(response.find_all("a", class_="pagging_nonsel")[-1].get_text())
+                debugPrint("Pages:  "+str(pages))
+                if pages:
+                    for offset in range(0,pages*10,10):
+                        browser.open("https://ifeelmyself.com/public/main.php?page=search_results&offset="+str(offset))
+                        response = browser.page
+                        tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+                        for table in tables:
+                            if table.find('video'): #New scenes use the video tag
+                                img=str(table.find("video")['poster'])
+                            elif table.find('img'): #old scenes still use the old format of a img tag
+                                img=str(table.find("img")['src'])
+                            debugPrint(f"Image:{img}")
+                            if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img):
+                                sys.stderr.write("FOUND")
+                                ret = extract_SceneInfo(table)
+                                break
+                else:
+                    sys.stderr.write("0 matches found!, check your filename")
+
+        else:
+            debugPrint("Name changed after downloading")
+            filename = filename.lower()
+            extract_from_filename = re.match(r"^([0-9\.]{6,10})?(?<title>.+)\s(?<artist>\w+)(\.mp4)?$",filename)
+            if extract_from_filename:
+                title = extract_from_filename.group('title')
+                if title:
+                    title = title.lower().replace("ifeelmyself","")
+                    title = title.replace("-","")
+                    title = title.replace("by", "")
+                    debugPrint(f"Title: {title}")
+                cookie_obj = create_cookie(name='ifm_search_keyword', value=title, domain='ifeelmyself.com')
+                browser.session.cookies.set_cookie(cookie_obj)
+                cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com')
+                browser.session.cookies.set_cookie(cookie_obj)
+                cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com')
+                browser.session.cookies.set_cookie(cookie_obj)
+                browser.open("https://ifeelmyself.com/public/main.php?page=search_results")
+                response = browser.page
+                #Obtaining and counting the results. Ideally you only have a single result
+                matches=response.find_all("a", href='javascript:;') #This a href javascript contains all the titles
+                if len(matches)==1:
+                    debugPrint("Found a single match!")
+                    table = response.find(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+                else:
+                    if len(matches)==0:
+                        sys.stderr.write("0 matches found! Check filename")
+                        print("{}")
+                        exit
+                    if len(matches)>1:
+                        debugPrint("Multiple videos found, maybe refine search term?")
+                        tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+                        table=tables[0] #Getting first
+                if table:
+                    ret = extract_SceneInfo(table)
+            else:
+                debugPrint("Not a supported filename")
+                print("{}")
+                exit
+    return ret
+
+
+
+
+
+def extract_PerformerInfo(table,browser,cover_url=None):
+    performer = table.find(class_= ["entryHeadingFlash","entryHeading"]).find_all("a")[1].get_text().replace("_"," ")
+    performer = str(performer)
+    debugPrint(f"Extracting info for performer: {performer}")
+    if cover_url == None:
+        cover_url=str(table.find("img")['src'])
+    debugPrint(cover_url)
+    artist_id = re.search(r"\/((f|m)\d{4,5})",cover_url,re.I).group(1)
+    artist_img = (f"https://bcdn.ifeelmyself.com/artists/" + artist_id + ".jpg")
+    if artist_id.startswith("f"):
+        gender="female"
+    else:
+        gender="male"
+    json_info = {"name": performer, "gender": gender, "url": (f"https://ifeelmyself.com/public/main.php?page=artist_bio&artist_id="+artist_id), "image": artist_img, "remote_site_id": artist_id}
+    return json_info
+
+
+def queryPerformer(perfname):
+    browser = StatefulBrowser(session=None)
+    perfname = perfname.lower()
+    browser.open("https://ifeelmyself.com/public/main.php")
+    cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    cookie_obj = create_cookie(name='ifm_search_keyword', value=perfname, domain='ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    debugPrint("Analyzing perfname...")
+    browser.open("https://ifeelmyself.com/public/main.php?page=search_results")
+    response = browser.page
+    #Obtaining and counting the results. Ideally you only have a single result
+    matches=response.find_all("a", href='javascript:;') #This a href javascript contains all the titles
+    debugPrint("Found: "+str(len(matches)))
+    ret = []
+    foundList = []
+    if len(matches)==0:
+        # often performer names use a underscore instead of a space, so replace spaces and try again
+        perfname = perfname.replace(" ","_")
+        cookie_obj = create_cookie(name='ifm_search_keyword', value=perfname, domain='ifeelmyself.com')
+        browser.session.cookies.set_cookie(cookie_obj)
+        browser.open("https://ifeelmyself.com/public/main.php?page=search_results")
+        response = browser.page
+        #Obtaining and counting the results. Ideally you only have a single result
+        matches=response.find_all("a", href='javascript:;') #This a href javascript contains all the titles
+        if len(matches)==0:
+            sys.stderr.write("0 matches found! Check performer name")
+            print("{}")
+            exit
+        if len(matches)>0:
+            debugPrint("Multiple videos found, scraping multiple performers")
+            tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+            for table in tables:
+                result = extract_PerformerInfo(table,browser)
+                if not result['name'] in foundList:
+                    foundList.append(result['name'])
+                    ret.append(result)
+    if len(matches)>0:
+        tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"])
+        for table in tables:
+            result = extract_PerformerInfo(table,browser)
+            if not result['name'] in foundList:
+                foundList.append(result['name'])
+                ret.append(result)
+    return ret
+
+
+
+
+
+def scrapePerformer(artist_id):
+    browser = StatefulBrowser(session=None)
+    cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com')
+    browser.session.cookies.set_cookie(cookie_obj)
+    browser.open(f"https://ifeelmyself.com/public/main.php?page=artist_bio&artist_id="+artist_id)
+    response = browser.page
+    tables = response.find_all(class_= ["bioTable"])
+    table=tables[0]
+    debugPrint(str(table))
+    bio = str(table.find("td"))
+    lines=bio.splitlines(True)
+    countryline=bio.splitlines(0)[1]
+    country=countryline.split("<br/>")[1]
+    details=lines[3]+lines[4]+lines[5]+lines[6]+lines[7]+lines[8]
+    details=details.replace("<strong>","").replace("</strong>","").replace("<br/>","")
+    json_info = {"country": country , "details": details}
+    return json_info
+
+
+
+
+# read the input
+i = readJSONInput()
+sys.stderr.write(json.dumps(i))
+
+if sys.argv[1] == "query" and sys.argv[2] == "scene":
+    ret = scrapeScene(i['title'],i['date'],i['url'])
+    print(json.dumps(ret))
+
+if sys.argv[1] == "query" and sys.argv[2] == "performer":
+    ret = queryPerformer(i['name'])
+    print(json.dumps(ret))
+
+if sys.argv[1] == "url":
+    ret = scrapeScene(filename=None,date=None,url=i['url'])
+    print(json.dumps(ret))
+
+if sys.argv[1] == "scrape":
+    country = ""
+    details = ""
+    if not ifeelauth == "":
+        ret = scrapePerformer(i['remote_site_id'])
+        country = ret['country']
+        details = ret['details']
+
+    json_info = {"name": i['name'], "gender": i['gender'], "url": i['url'],"country": country ,"details": details , "image": "https://bcdn.ifeelmyself.com/artists/" + i['remote_site_id'] + ".jpg"}
+    print(json.dumps(json_info))
--- a/stash/config/scrapers/community/IFeelMyself/IFeelMyself.yml
+++ b/stash/config/scrapers/community/IFeelMyself/IFeelMyself.yml
@@ -0,0 +1,30 @@
+name: IFeelMyself
+performerByName:
+  action: script
+  script:
+    - python3
+    - IFeelMyself.py
+    - query
+    - performer
+performerByFragment:
+  action: script
+  script:
+    - python3
+    - IFeelMyself.py
+    - scrape
+sceneByFragment:
+  action: script
+  script:
+    - python3
+    - IFeelMyself.py
+    - query
+    - scene
+sceneByURL:
+  - url:
+      - ifeelmyself.com
+    action: script
+    script:
+      - python3
+      - IFeelMyself.py
+      - url
+# Last Updated February 04, 2023
--- a/stash/config/scrapers/community/IFeelMyself/manifest
+++ b/stash/config/scrapers/community/IFeelMyself/manifest
@@ -0,0 +1,10 @@
+id: IFeelMyself
+name: IFeelMyself
+metadata: {}
+version: a0ee6d1
+date: "2024-04-01 22:13:36"
+requires: []
+source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
+files:
+- IFeelMyself.yml
+- IFeelMyself.py