changelog:

- workaround for NTFS filesystem when username ends with dot. - ReadMe changes - other minor changes
2025-09-05 05:55:35 +05:00 · 2022-07-22 00:45:06 +05:00 · 2022-07-22 00:45:06 +05:00 · d610cd350e
commit d610cd350e
parent 007f00b8ba
8 changed files with 141 additions and 162 deletions
--- a/Modules/config.py
+++ b/Modules/config.py
@ -1,4 +1,5 @@
 import argparse
+import os

 parser = argparse.ArgumentParser(
    formatter_class=argparse.RawTextHelpFormatter,
@ -28,7 +29,8 @@ parser.add_argument(
    "username",
    nargs="?",
    help="username of the furaffinity \
-user",
+user (if username is starting with '-' or '--' \
+provide them through a file instead)",
 )
 parser.add_argument(
    "category",
@ -36,14 +38,16 @@ parser.add_argument(
    help="the category to download, gallery/scraps/favorites \
 [default: gallery]",
    default="gallery",
+    type=str,
 )
-parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file")
+parser.add_argument("--cookies", "-c", help="path to a NetScape cookies file", type=str)
 parser.add_argument(
    "--output",
    "-o",
    dest="output_folder",
    default="Submissions",
    help="set a custom output folder",
+    type=str,
 )
 parser.add_argument(
    "--check",
@ -51,56 +55,53 @@ parser.add_argument(
    help="check and download latest submissions of a user",
 )
 parser.add_argument(
-    "-ua",
    "--user-agent",
+    "-ua",
    dest="user_agent",
    default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
 Firefox/101.0",
-    help="Your browser's useragent, may be required, depending on your luck",
+    help="Your browser's user agent, may be required, depending on your luck",
+    type=str,
 )
 parser.add_argument(
-    "-sub",
    "--submissions",
+    "-sub",
    action="store_true",
    help="download your \
 submissions",
 )
 parser.add_argument(
-    "-f",
    "--folder",
+    "-f",
    help="full path of the furaffinity gallery folder. for instance 123456/\
 Folder-Name-Here",
+    type=str,
 )
+parser.add_argument("--start", default=1, help="page number to start from", type=str)
 parser.add_argument(
-    "-s",
-    "--start",
-    default=1,
-    help="page number to start from",
-)
-parser.add_argument(
-    "-S",
    "--stop",
    default=0,
    help="Page number to stop on. Specify the full URL after the username: for \
 favorites pages (1234567890/next) or for submissions pages: \
 (new~123456789@48)",
+    type=str,
 )
 parser.add_argument(
-    "-rd",
    "--redownload",
+    "-rd",
    action="store_false",
    help="Redownload files that have been downloaded already",
 )
 parser.add_argument(
-    "-i",
    "--interval",
-    type=int,
+    "-i",
    default=0,
    help="delay between downloading pages in seconds [default: 0]",
+    type=int,
 )
 parser.add_argument(
-    "-r",
    "--rating",
+    "-r",
    action="store_false",
    help="disable rating separation",
 )
@ -111,18 +112,17 @@ parser.add_argument(
    help="enable submission filter",
 )
 parser.add_argument(
-    "-m",
    "--metadata",
+    "-m",
    action="store_true",
    help="enable metadata saving",
 )
 parser.add_argument(
-    "--download",
-    help="download a specific submission by providing its id",
+    "--download", help="download a specific submission by providing its id", type=str
 )
 parser.add_argument(
-    "-jd",
    "--json-description",
+    "-jd",
    dest="json_description",
    action="store_true",
    help="download description as a JSON list",
@ -147,6 +147,10 @@ category = args.category
 if username is not None:
    username = username.split(" ")

+    if os.path.exists(username[0]):
+        data = open(username[0]).read()
+        username = filter(None, data.split("\n"))
+
 # Custom input
 cookies = args.cookies
 output_folder = args.output_folder
@ -199,4 +203,4 @@ search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
 |TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
 |REM[insder]*\\b\
-|\\bREF|\\bSale|auction|multislot|stream|adopt'
+|\\bREF|\\bSale|auction|multislot|multi slot|stream|adopt'
--- a/Modules/download.py
+++ b/Modules/download.py
@ -1,28 +1,18 @@
-import http.cookiejar as cookielib
 import json
 import os

-import requests
 from bs4 import BeautifulSoup
 from pathvalidate import sanitize_filename
 from tqdm import tqdm

 import Modules.config as config
-from Modules.functions import download_complete
+from Modules.functions import DownloadComplete
 from Modules.functions import requests_retry_session
 from Modules.functions import system_message_handler

-session = requests.session()
-if config.cookies is not None:  # add cookies if present
-    cookies = cookielib.MozillaCookieJar(config.cookies)
-    cookies.load()
-    session.cookies = cookies
-

 def download(path):
-    response = requests_retry_session(session=session).get(
-        f"{config.BASE_URL}{path}"
-    )
+    response = requests_retry_session().get(f"{config.BASE_URL}{path}")
    s = BeautifulSoup(response.text, "html.parser")

    # System messages
@ -32,7 +22,7 @@ def download(path):
        image = s.find(class_="download").find("a").attrs.get("href")
    except AttributeError:
        print(
-            f"{config.ERROR_COLOR}uncessesful download of {config.BASE_URL}{path}{config.END}"
+            f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}"
        )
        download(path)
        return True
@ -40,10 +30,14 @@ def download(path):
    filename = sanitize_filename(image.split("/")[-1:][0])

    author = (
-        s.find(class_="submission-id-sub-container").find("a").find("strong").text
+        s.find(class_="submission-id-sub-container")
+        .find("a")
+        .find("strong")
+        .text.replace(".", "._")
    )
+
    title = sanitize_filename(
-        s.find(class_="submission-title").find("p").contents[0]
+        str(s.find(class_="submission-title").find("p").contents[0])
    )
    view_id = int(path.split("/")[-2:-1][0])

@ -70,18 +64,19 @@ def download(path):

    image_url = f"https:{image}"

-    if download_file(image_url, output_path, f"{title} - [{rating}]") is True:
+    if (
+        download_file(
+            image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
+        )
+        is True
+    ):
        with open(
            f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
        ) as idx:
            idx.write(f"({view_id})\n")

    if config.metadata is True:
-        dsc = (
-            s.find(class_="submission-description")
-            .text.strip()
-            .replace("\r\n", "\n")
-        )
+        dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
        if config.json_description is True:
            dsc = []
        data = {
@ -98,9 +93,7 @@ def download(path):
            "species": s.find(class_="info").findAll("div")[2].find("span").text,
            "gender": s.find(class_="info").findAll("div")[3].find("span").text,
            "views": int(s.find(class_="views").find(class_="font-large").text),
-            "favorites": int(
-                s.find(class_="favorites").find(class_="font-large").text
-            ),
+            "favorites": int(s.find(class_="favorites").find(class_="font-large").text),
            "rating": rating,
            "comments": [],
        }
@ -114,17 +107,17 @@ def download(path):
    return True


-def download_file(url, fname, desc):
+def download_file(url, view_url, file_name, desc):
    try:
-        r = session.get(url, stream=True)
+        r = requests_retry_session().get(url, stream=True)
        if r.status_code != 200:
            print(
                f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \
-"{fname}". URL {url} ...skipping{config.END}'
+"{file_name}" ({view_url}) ...skipping{config.END}'
            )
            return False
        total = int(r.headers.get("Content-Length", 0))
-        with open(fname, "wb") as file, tqdm(
+        with open(file_name, "wb") as file, tqdm(
            desc=desc.ljust(40),
            total=total,
            miniters=100,
@ -137,7 +130,7 @@ def download_file(url, fname, desc):
                bar.update(size)
    except KeyboardInterrupt:
        print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
-        os.remove(fname)
+        os.remove(file_name)
        exit()
    return True

@ -155,7 +148,7 @@ def create_metadata(output, data, s, title, filename):
        for desc in s.find("div", class_="submission-description").stripped_strings:
            data["description"].append(desc)

-    # Extact tags
+    # Extract tags

    try:
        for tag in s.find(class_="tags-row").findAll(class_="tags"):
@ -194,7 +187,7 @@ def file_exists_fallback(author, title, view_id):
            f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
 "{author}"{config.END}'
        )
-        raise download_complete
+        raise DownloadComplete
    print(
        f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
 it\'s already downloaded{config.END}'
--- a/Modules/functions.py
+++ b/Modules/functions.py
@ -9,14 +9,6 @@ from urllib3.util import Retry

 import Modules.config as config

-session = requests.session()
-if config.cookies is not None:  # add cookies if present
-    cookies = cookielib.MozillaCookieJar(config.cookies)
-    cookies.load()
-    session.cookies = cookies
-
-session.headers.update({"User-Agent": config.user_agent})
-

 def requests_retry_session(
    retries=3,
@ -24,7 +16,13 @@ def requests_retry_session(
    status_forcelist=(500, 502, 504, 104),
    session=None,
 ):
+    """Get a session, and retry in case of an error"""
    session = session or requests.Session()
+    if config.cookies is not None:  # add cookies if present
+        cookies = cookielib.MozillaCookieJar(config.cookies)
+        cookies.load()
+        session.cookies = cookies
+    session.headers.update({"User-Agent": config.user_agent})
    retry = Retry(
        total=retries,
        read=retries,
@ -38,11 +36,12 @@ def requests_retry_session(
    return session


-class download_complete(Exception):
+class DownloadComplete(Exception):
    pass


 def check_filter(title):
+    """Compare post title and search string, then return 'True' if match found"""

    match = re.search(
        config.search,
@ -56,6 +55,7 @@ def check_filter(title):


 def system_message_handler(s):
+    """Parse and return system message text"""
    try:
        message = {
            s.find(class_="notice-message")
@ -78,18 +78,19 @@ def system_message_handler(s):
                .text.strip()
            )
    print(f"{config.WARN_COLOR}System Message: {message}{config.END}")
-    raise download_complete
+    raise DownloadComplete


 def login():
+    """Get cookies from any browser with logged in furaffinity and save them to file"""
+    session = requests.Session()
+    cj = browser_cookie3.load()

-    CJ = browser_cookie3.load()
+    response = session.get(config.BASE_URL, cookies=cj)
+    fa_cookies = cj._cookies[".furaffinity.net"]["/"]

-    response = session.get(config.BASE_URL, cookies=CJ)
-    FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
-
-    cookie_a = FA_COOKIES["a"]
-    cookie_b = FA_COOKIES["b"]
+    cookie_a = fa_cookies["a"]
+    cookie_b = fa_cookies["b"]

    s = BeautifulSoup(response.text, "html.parser")
    try:
@ -116,48 +117,51 @@ furaffinity in your browser, or you can export cookies.txt manually{config.END}"


 def next_button(page_url):
-    response = session.get(page_url)
+    """Parse Next button and get next page url"""
+    response = requests_retry_session().get(page_url)
    s = BeautifulSoup(response.text, "html.parser")
    if config.submissions is True:
        # unlike galleries that are sequentially numbered, submissions use a different scheme.
        # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
        try:
-            next_button = s.find("a", class_="button standard more").attrs.get("href")
+            parse_next_button = s.find("a", class_="button standard more").attrs.get(
+                "href"
+            )
        except AttributeError:
            try:
-                next_button = s.find("a", class_="button standard more-half").attrs.get(
-                    "href"
-                )
+                parse_next_button = s.find(
+                    "a", class_="button standard more-half"
+                ).attrs.get("href")
            except AttributeError as e:
                print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
-                raise download_complete from e
-        page_num = next_button.split("/")[-2]
+                raise DownloadComplete from e
+        page_num = parse_next_button.split("/")[-2]
    elif config.category != "favorites":
-        next_button = s.find("button", class_="button standard", text="Next")
-        if next_button is None or next_button.parent is None:
+        parse_next_button = s.find("button", class_="button standard", text="Next")
+        if parse_next_button is None or parse_next_button.parent is None:
            print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
-            raise download_complete
-        page_num = next_button.parent.attrs["action"].split("/")[-2]
+            raise DownloadComplete
+        page_num = parse_next_button.parent.attrs["action"].split("/")[-2]
    else:
-        next_button = s.find("a", class_="button standard right", text="Next")
-        page_num = fav_next_button(s)
+        parse_next_button = s.find("a", class_="button standard right", text="Next")
+        page_num = fav_next_button(parse_next_button)
    print(
-        f"Downloading page {page_num} - {config.BASE_URL}{next_button.parent.attrs['action']}"
+        f"Downloading page {page_num} - {config.BASE_URL}{parse_next_button.parent.attrs['action']}"
    )
    return page_num


-def fav_next_button():
+def fav_next_button(parse_next_button):
    # unlike galleries that are sequentially numbered, favorites use a different scheme.
    # the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
-    if next_button is None:
+    if parse_next_button is None:
        print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
-        raise download_complete
-    next_page_link = next_button.attrs["href"]
+        raise DownloadComplete
+    next_page_link = parse_next_button.attrs["href"]
    next_fav_num = re.search(r"\d+", next_page_link)

    if next_fav_num is None:
        print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
-        raise download_complete
+        raise DownloadComplete

    return f"{next_fav_num[0]}/next"
--- a/Modules/index.py
+++ b/Modules/index.py
@ -9,8 +9,8 @@ import Modules.config as config
@lru_cache(maxsize=None)
 def start_indexing(path, layer=0):

-    """Recursively iterate over each item in path
-    and print item's name.
+    """Recursively iterate over each item in path, then
+    save and print item's name.
    """

    # make Path object from input string
@ -23,7 +23,7 @@ def start_indexing(path, layer=0):
            if p.is_file():
                name = p.stem
                ext = p.suffix
-                match = re.search(r"\([0-9]{5,}\)", name)
+                match = re.search(r"\(\d{5,}\)", name)
                if match is None and ext not in [".txt", ".idx"]:
                    return

@ -39,6 +39,7 @@ def start_indexing(path, layer=0):

@lru_cache(maxsize=None)
 def check_file(path):
+    """compare file view id with index list"""
    view_id = path.split("/")[-2:-1][0]
    with contextlib.suppress(FileNotFoundError):
        with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx: