changelog:

ability to add comments in username list with "#" autoremoval of "_" in usernames speedup filter checking add basic indexing -> speedup existing file checking for newer files other small changes
2025-09-05 05:55:35 +05:00 · 2022-07-10 03:24:39 +05:00 · 2022-07-10 03:24:39 +05:00 · 675f558d03
commit 675f558d03
parent 377df392e5
5 changed files with 260 additions and 122 deletions
--- a/Modules/config.py
+++ b/Modules/config.py
@ -72,7 +72,10 @@ parser.add_argument(
 Folder-Name-Here",
 )
 parser.add_argument(
-    "-s", "--start", default=1, help="page number to start from",
+    "-s",
    "--start",
    default=1,
    help="page number to start from",
 )
 parser.add_argument(
    "-S",
@ -115,7 +118,7 @@ parser.add_argument(
 )
 parser.add_argument(
    "--download",
-    help="download a specific submission /view/12345678/",
+    help="download a specific submission by providing its id",
 )
 parser.add_argument(
    "-jd",
@ -129,6 +132,11 @@ parser.add_argument(
    action="store_true",
    help="extract furaffinity cookies directly from your browser",
 )
 parser.add_argument(
    "--index",
    action="store_true",
    help="create an index of downloaded files in an output folder",
 )
 args = parser.parse_args()
@ -136,7 +144,7 @@ args = parser.parse_args()
 username = args.username
 category = args.category
-if username != None:
+if username is not None:
    username = username.split(" ")
 # Custom input
@ -153,6 +161,7 @@ folder = args.folder
 login = args.login
 check = args.check
 index = args.index
 submissions = args.submissions
 json_description = args.json_description
 metadata = args.metadata
@ -168,3 +177,26 @@ END = "\033[0m"
 # Globals
 BASE_URL = "https://www.furaffinity.net"
 username_replace_chars = {
    " ": "",
    "_": "",
 }
 search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
 |OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
 |CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
 |AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
 |CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
 |LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
 |OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
 |COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
 |CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
 |FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
 |TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |REM[insder]*\\b\
 |\\bREF|\\bSale|auction|multislot|stream|adopt'
--- a/Modules/download.py
+++ b/Modules/download.py
@ -1,12 +1,16 @@
 import http.cookiejar as cookielib
 import json
 from tqdm import tqdm
 from pathvalidate import sanitize_filename
 import Modules.config as config
 import os
 import requests
 from bs4 import BeautifulSoup
-import http.cookiejar as cookielib
+from pathvalidate import sanitize_filename
-from Modules.functions import system_message_handler, check_filter, download_complete
+from tqdm import tqdm
 import Modules.config as config
 from Modules.functions import download_complete
 from Modules.functions import requests_retry_session
 from Modules.functions import system_message_handler
 session = requests.session()
 if config.cookies is not None:  # add cookies if present
@ -14,8 +18,10 @@ if config.cookies is not None:  # add cookies if present
    cookies.load()
    session.cookies = cookies
 def download(path):
-    response = session.get(f"{config.BASE_URL}{path}")
+
    response = requests_retry_session(session=session).get(f"{config.BASE_URL}{path}")
    s = BeautifulSoup(response.text, "html.parser")
    # System messages
@ -23,78 +29,71 @@ def download(path):
        system_message_handler(s)
    image = s.find(class_="download").find("a").attrs.get("href")
-    title = s.find(class_="submission-title").find("p").contents[0]
+    filename = sanitize_filename(image.split("/")[-1:][0])
    title = sanitize_filename(title)
    dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
-    if config.json_description is True:
+    author = s.find(class_="submission-id-sub-container").find("a").find("strong").text
-        dsc = []
+    title = sanitize_filename(s.find(class_="submission-title").find("p").contents[0])
-    filename = image.split("/")[-1:][0]
+    view_id = int(path.split("/")[-2:-1][0])
-    data = {
+
-        "id": int(path.split("/")[-2:-1][0]),
+    output = f"{config.output_folder}/{author}"
-        "filename": filename,
+    rating = s.find(class_="rating-box").text.strip()
-        "author": s.find(class_="submission-id-sub-container")
+
-        .find("a")
+    if config.category != "gallery":
-        .find("strong")
+        output = f"{config.output_folder}/{author}/{config.category}"
-        .text,
+    if config.folder is not None:
-        "date": s.find(class_="popup_date").attrs.get("title"),
+        output = f"{config.output_folder}/{author}/{config.folder}"
-        "title": title,
+    os.makedirs(output, exist_ok=True)
-        "description": dsc,
+
-        "url": f"{config.BASE_URL}{path}",
+    output_path = f"{output}/{title} ({view_id}) - {filename}"
-        "tags": [],
+    output_path_fb = f"{output}/{title} - {filename}"
-        "category": s.find(class_="info").find(class_="category-name").text,
+    if config.rating is True:
-        "type": s.find(class_="info").find(class_="type-name").text,
+        os.makedirs(f"{output}/{rating}", exist_ok=True)
-        "species": s.find(class_="info").findAll("div")[2].find("span").text,
+        output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
-        "gender": s.find(class_="info").findAll("div")[3].find("span").text,
+        output_path_fb = f"{output}/{rating}/{title} - {filename}"
-        "views": int(s.find(class_="views").find(class_="font-large").text),
+
-        "favorites": int(s.find(class_="favorites").find(class_="font-large").text),
+    if config.dont_redownload is True and os.path.isfile(output_path_fb):
-        "rating": s.find(class_="rating-box").text.strip(),
+        return file_exists_fallback(author, title)
        "comments": [],
    }
    if config.submission_filter is True and check_filter(title) is True:
        print(
            f'{config.WARN_COLOR}"{title}" was filtered and will not be \
 downloaded - {data.get("url")}{config.END}'
        )
        return True
    image_url = f"https:{image}"
-    output = f"{config.output_folder}/{data.get('author')}"
+    download_file(
-    if config.category != "gallery":
+        image_url,
-        output = f"{config.output_folder}/{data.get('author')}/{config.category}"
+        output_path,
-    if config.folder is not None:
+        f"{title} - \
-        output = f"{config.output_folder}/{data.get('author')}/{config.folder}"
+[{rating}]",
-    os.makedirs(output, exist_ok=True)
+    )
    filename = sanitize_filename(filename)
    output_path = f"{output}/{title} - {filename}"
    if config.rating is True:
        os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
        output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
    if config.dont_redownload is True and os.path.isfile(output_path):
        if config.check is True:
            print(
                f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}"
            )
            raise download_complete
        print(
            f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}'
        )
        return True
    else:
        download_file(
            image_url,
            output_path,
            f'{title} - \
 [{data.get("rating")}]',
        )
    if config.metadata is True:
        dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
        if config.json_description is True:
            dsc = []
        data = {
            "id": view_id,
            "filename": filename,
            "author": author,
            "date": s.find(class_="popup_date").attrs.get("title"),
            "title": title,
            "description": dsc,
            "url": f"{config.BASE_URL}{path}",
            "tags": [],
            "category": s.find(class_="info").find(class_="category-name").text,
            "type": s.find(class_="info").find(class_="type-name").text,
            "species": s.find(class_="info").findAll("div")[2].find("span").text,
            "gender": s.find(class_="info").findAll("div")[3].find("span").text,
            "views": int(s.find(class_="views").find(class_="font-large").text),
            "favorites": int(s.find(class_="favorites").find(class_="font-large").text),
            "rating": rating,
            "comments": [],
        }
        create_metadata(output, data, s, title, filename)
    if config.download is not None:
-        print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}')
+        print(
            f'{config.SUCCESS_COLOR}File saved as \
 "{output_path}" {config.END}'
        )
    return True
 def download_file(url, fname, desc):
    try:
        r = session.get(url, stream=True)
@ -121,7 +120,8 @@ def download_file(url, fname, desc):
        os.remove(fname)
        exit()
    return True
-    
+
 def create_metadata(output, data, s, title, filename):
    if config.rating is True:
        os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
@ -163,4 +163,18 @@ def create_metadata(output, data, s, title, filename):
    # Write a UTF-8 encoded JSON file for metadata
    with open(f"{metadata}.json", "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=4)
+        json.dump(data, f, ensure_ascii=False, indent=4)
 def file_exists_fallback(author, title):
    if config.check is True:
        print(
            f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
 "{author}"{config.END}'
        )
        raise download_complete
    print(
        f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
 it\'s already downloaded{config.END}'
    )
    return True
--- a/Modules/functions.py
+++ b/Modules/functions.py
@ -4,6 +4,8 @@ import re
 import browser_cookie3
 import requests
 from bs4 import BeautifulSoup
 from requests.adapters import HTTPAdapter
 from urllib3.util import Retry
 import Modules.config as config
@ -13,39 +15,43 @@ if config.cookies is not None:  # add cookies if present
    cookies.load()
    session.cookies = cookies
 session.headers.update({"User-Agent": config.user_agent})
 def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504, 104),
    session=None,
 ):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    return session
 class download_complete(Exception):
    pass
 def check_filter(title):
    search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
 |OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
 |CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
 |AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
 |CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
 |LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
 |COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
 |OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
 |COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
 |CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
 |FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
 |TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |REM[insder]*\\b\
 |\\bREF|\\bSale|auction|multislot|stream|adopt'
    match = re.search(
-        search,
+        config.search,
        title,
        re.IGNORECASE,
    )
    if match is not None and title == match.string:
        return True
    return None
@ -68,9 +74,7 @@ def system_message_handler(s):
    raise download_complete
-def login(user_agent):
+def login():
    session.headers.update({"User-Agent": user_agent})
    CJ = browser_cookie3.load()
@ -103,8 +107,6 @@ by using "-c cookies.txt"{config.END}'
 furaffinity in your browser, or you can export cookies.txt manually{config.END}"
        )
    exit()
 def next_button(page_url):
    response = session.get(page_url)
@ -130,15 +132,17 @@ def next_button(page_url):
            raise download_complete
        page_num = next_button.parent.attrs["action"].split("/")[-2]
    else:
        next_button = s.find("a", class_="button standard right", text="Next")
        page_num = fav_next_button(s)
-    print(f"Downloading page {page_num} - {page_url}")
+    print(
        f"Downloading page {page_num} - {config.BASE_URL}/{next_button.parent.attrs['action']}"
    )
    return page_num
-def fav_next_button(s):
+def fav_next_button():
    # unlike galleries that are sequentially numbered, favorites use a different scheme.
    # the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
    next_button = s.find("a", class_="button standard right", text="Next")
    if next_button is None:
        print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
        raise download_complete
--- a/Modules/index.py
+++ b/Modules/index.py
@ -0,0 +1,37 @@
 import contextlib
 import re
 from pathlib import Path
 import Modules.config as config
 def start_indexing(path, layer=0):
    """Recursively iterate over each item in path
    and print item's name.
    """
    # make Path object from input string
    path = Path(path)
    with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
        # iter the directory
        for p in path.iterdir():
            if p.is_file():
                idx.write(f"{p}\n")
            elif p.is_dir():
                start_indexing(p, layer + 1)
            else:
                raise FileNotFoundError()
 def check_file(path):
    view_id = path.split("/")[-2:-1][0]
    with contextlib.suppress(FileNotFoundError):
        with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
            index = idx.read()
            match = re.search(view_id, index)
        if match is not None:
            return True
--- a/furaffinity-dl.py
+++ b/furaffinity-dl.py
@ -9,10 +9,14 @@ from bs4 import BeautifulSoup
 import Modules.config as config
 from Modules.download import download
 from Modules.functions import check_filter
 from Modules.functions import download_complete
 from Modules.functions import login
 from Modules.functions import next_button
 from Modules.functions import requests_retry_session
 from Modules.functions import system_message_handler
 from Modules.index import check_file
 from Modules.index import start_indexing
 # get session
 session = requests.session()
@ -31,12 +35,13 @@ def main():
        while True:
            if config.stop == page_num:
                print(
-                    f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}'
+                    f'{config.WARN_COLOR}Reached page "{config.stop}", \
 stopping.{config.END}'
                )
                break
            page_url = f"{download_url}/{page_num}"
-            response = session.get(page_url)
+            response = requests_retry_session(session=session).get(page_url)
            s = BeautifulSoup(response.text, "html.parser")
            # System messages
@ -50,7 +55,30 @@ def main():
            # Download all images on the page
            for img in s.findAll("figure"):
-                download(img.find("a").attrs.get("href"))
+                title = img.find("figcaption").contents[0].text
                img_url = img.find("a").attrs.get("href")
                if config.submission_filter is True and check_filter(title) is True:
                    print(
                        f'{config.WARN_COLOR}"{title}" was filtered and will not be \
 downloaded - {config.BASE_URL}{img_url}{config.END}'
                    )
                    continue
                if config.dont_redownload is True and check_file(img_url) is True:
                    if config.check is True:
                        print(
                            f'{config.SUCCESS_COLOR}Downloaded all recent files of \
 "{config.username[0]}"{config.END}'
                        )
                        raise download_complete
                    print(
                        f'{config.WARN_COLOR}Skipping "{title}" since \
 it\'s already downloaded{config.END}'
                    )
                    continue
                download(img_url)
                sleep(config.interval)
            page_num = next_button(page_url)
@ -58,13 +86,18 @@ def main():
 if __name__ == "__main__":
    if config.login is True:
-        login(config.user_agent)
+        login()
        exit()
    if config.index is True:
        if os.path.isfile(f"{config.output_folder}/index.idx"):
            os.remove(f"{config.output_folder}/index.idx")
        start_indexing(config.output_folder)
        print(f"{config.SUCCESS_COLOR}indexing finished{config.END}")
        exit()
    try:
-        response = session.get(config.BASE_URL)
+        response = requests_retry_session(session=session).get(config.BASE_URL)
    except ConnectionError:
        print(f"{config.ERROR_COLOR}Connection failed{config.END}")
        exit()
    except KeyboardInterrupt:
        print(f"{config.WARN_COLOR}Aborted by user{config.END}")
        exit()
@ -72,14 +105,18 @@ if __name__ == "__main__":
    s = BeautifulSoup(response.text, "html.parser")
    if s.find(class_="loggedin_user_avatar") is not None:
        account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
-        print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}')
+        print(
            f'{config.SUCCESS_COLOR}Logged in as \
 "{account_username}"{config.END}'
        )
    else:
        print(
-            f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}"
+            f"{config.WARN_COLOR}Not logged in, NSFW content \
 is inaccessible{config.END}"
        )
    if config.download is not None:
-        download(config.download)
+        download(f"/view/{config.download}/")
        exit()
    if config.submissions is True:
@ -109,15 +146,29 @@ downloading "{config.folder[1]}"{config.END}'
        )
        exit()
-    if os.path.exists(config.username[0]):
+    try:
-        data = open(config.username[0]).read()
+        if os.path.exists(config.username[0]):
-        config.username = filter(None, data.split("\n"))
+            data = open(config.username[0]).read()
            config.username = filter(None, data.split("\n"))
    except TypeError or AttributeError:
        print(
            f"{config.ERROR_COLOR}Please enter a username \
 or provide a file with usernames (1 username per line){config.END}"
        )
        exit()
    for username in config.username:
-        print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
+        username = username.split("#")[0].translate(
-        download_url = f"{config.BASE_URL}/{config.category}/{username}"
+            str.maketrans(config.username_replace_chars)
        main()
        print(
            f'{config.SUCCESS_COLOR}Finished \
 downloading "{username}"{config.END}'
        )
        if username != "":
            print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
            download_url = f"{config.BASE_URL}/{config.category}/{username}"
            main()
            print(
                f'{config.SUCCESS_COLOR}Finished \
 downloading "{username}"{config.END}'
            )
    if os.path.isfile(f"{config.output_folder}/index.idx"):
        os.remove(f"{config.output_folder}/index.idx")
    start_indexing(config.output_folder)