From 675f558d03c30792b3da977991487ff561607e64 Mon Sep 17 00:00:00 2001
From: Radiquum <kentai.waah@gmail.com>
Date: Sun, 10 Jul 2022 03:24:39 +0500
Subject: [PATCH] changelog: ability to add comments in username list with "#"
 autoremoval of "_" in usernames speedup filter checking add basic indexing ->
 speedup existing file checking for newer files other small changes

---
 Modules/config.py    |  38 ++++++++++-
 Modules/download.py  | 156 +++++++++++++++++++++++--------------------
 Modules/functions.py |  60 +++++++++--------
 Modules/index.py     |  37 ++++++++++
 furaffinity-dl.py    |  91 +++++++++++++++++++------
 5 files changed, 260 insertions(+), 122 deletions(-)
 create mode 100644 Modules/index.py

diff --git a/Modules/config.py b/Modules/config.py
index 0b4ce34..f12a5c1 100644
--- a/Modules/config.py
+++ b/Modules/config.py
@@ -72,7 +72,10 @@ parser.add_argument(
 Folder-Name-Here",
 )
 parser.add_argument(
-    "-s", "--start", default=1, help="page number to start from",
+    "-s",
+    "--start",
+    default=1,
+    help="page number to start from",
 )
 parser.add_argument(
     "-S",
@@ -115,7 +118,7 @@ parser.add_argument(
 )
 parser.add_argument(
     "--download",
-    help="download a specific submission /view/12345678/",
+    help="download a specific submission by providing its id",
 )
 parser.add_argument(
     "-jd",
@@ -129,6 +132,11 @@ parser.add_argument(
     action="store_true",
     help="extract furaffinity cookies directly from your browser",
 )
+parser.add_argument(
+    "--index",
+    action="store_true",
+    help="create an index of downloaded files in an output folder",
+)
 
 args = parser.parse_args()
 
@@ -136,7 +144,7 @@ args = parser.parse_args()
 username = args.username
 category = args.category
 
-if username != None:
+if username is not None:
     username = username.split(" ")
 
 # Custom input
@@ -153,6 +161,7 @@ folder = args.folder
 
 login = args.login
 check = args.check
+index = args.index
 submissions = args.submissions
 json_description = args.json_description
 metadata = args.metadata
@@ -168,3 +177,26 @@ END = "\033[0m"
 
 # Globals
 BASE_URL = "https://www.furaffinity.net"
+username_replace_chars = {
+    " ": "",
+    "_": "",
+}
+search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
+|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
+|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
+|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
+|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
+|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
+|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
+|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
+|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
+|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
+|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
+|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
+|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
+|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
+|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
+|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
+|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
+|REM[insder]*\\b\
+|\\bREF|\\bSale|auction|multislot|stream|adopt'
diff --git a/Modules/download.py b/Modules/download.py
index 0b94607..43b9b4b 100644
--- a/Modules/download.py
+++ b/Modules/download.py
@@ -1,12 +1,16 @@
+import http.cookiejar as cookielib
 import json
-from tqdm import tqdm
-from pathvalidate import sanitize_filename
-import Modules.config as config
 import os
+
 import requests
 from bs4 import BeautifulSoup
-import http.cookiejar as cookielib
-from Modules.functions import system_message_handler, check_filter, download_complete
+from pathvalidate import sanitize_filename
+from tqdm import tqdm
+
+import Modules.config as config
+from Modules.functions import download_complete
+from Modules.functions import requests_retry_session
+from Modules.functions import system_message_handler
 
 session = requests.session()
 if config.cookies is not None:  # add cookies if present
@@ -14,8 +18,10 @@ if config.cookies is not None:  # add cookies if present
     cookies.load()
     session.cookies = cookies
 
+
 def download(path):
-    response = session.get(f"{config.BASE_URL}{path}")
+
+    response = requests_retry_session(session=session).get(f"{config.BASE_URL}{path}")
     s = BeautifulSoup(response.text, "html.parser")
 
     # System messages
@@ -23,78 +29,71 @@ def download(path):
         system_message_handler(s)
 
     image = s.find(class_="download").find("a").attrs.get("href")
-    title = s.find(class_="submission-title").find("p").contents[0]
-    title = sanitize_filename(title)
-    dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
+    filename = sanitize_filename(image.split("/")[-1:][0])
 
-    if config.json_description is True:
-        dsc = []
-    filename = image.split("/")[-1:][0]
-    data = {
-        "id": int(path.split("/")[-2:-1][0]),
-        "filename": filename,
-        "author": s.find(class_="submission-id-sub-container")
-        .find("a")
-        .find("strong")
-        .text,
-        "date": s.find(class_="popup_date").attrs.get("title"),
-        "title": title,
-        "description": dsc,
-        "url": f"{config.BASE_URL}{path}",
-        "tags": [],
-        "category": s.find(class_="info").find(class_="category-name").text,
-        "type": s.find(class_="info").find(class_="type-name").text,
-        "species": s.find(class_="info").findAll("div")[2].find("span").text,
-        "gender": s.find(class_="info").findAll("div")[3].find("span").text,
-        "views": int(s.find(class_="views").find(class_="font-large").text),
-        "favorites": int(s.find(class_="favorites").find(class_="font-large").text),
-        "rating": s.find(class_="rating-box").text.strip(),
-        "comments": [],
-    }
-    if config.submission_filter is True and check_filter(title) is True:
-        print(
-            f'{config.WARN_COLOR}"{title}" was filtered and will not be \
-downloaded - {data.get("url")}{config.END}'
-        )
-        return True
+    author = s.find(class_="submission-id-sub-container").find("a").find("strong").text
+    title = sanitize_filename(s.find(class_="submission-title").find("p").contents[0])
+    view_id = int(path.split("/")[-2:-1][0])
+
+    output = f"{config.output_folder}/{author}"
+    rating = s.find(class_="rating-box").text.strip()
+
+    if config.category != "gallery":
+        output = f"{config.output_folder}/{author}/{config.category}"
+    if config.folder is not None:
+        output = f"{config.output_folder}/{author}/{config.folder}"
+    os.makedirs(output, exist_ok=True)
+
+    output_path = f"{output}/{title} ({view_id}) - {filename}"
+    output_path_fb = f"{output}/{title} - {filename}"
+    if config.rating is True:
+        os.makedirs(f"{output}/{rating}", exist_ok=True)
+        output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
+        output_path_fb = f"{output}/{rating}/{title} - {filename}"
+
+    if config.dont_redownload is True and os.path.isfile(output_path_fb):
+        return file_exists_fallback(author, title)
 
     image_url = f"https:{image}"
-    output = f"{config.output_folder}/{data.get('author')}"
-    if config.category != "gallery":
-        output = f"{config.output_folder}/{data.get('author')}/{config.category}"
-    if config.folder is not None:
-        output = f"{config.output_folder}/{data.get('author')}/{config.folder}"
-    os.makedirs(output, exist_ok=True)
-    filename = sanitize_filename(filename)
-    output_path = f"{output}/{title} - {filename}"
-    if config.rating is True:
-        os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
-        output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
-
-    if config.dont_redownload is True and os.path.isfile(output_path):
-        if config.check is True:
-            print(
-                f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}"
-            )
-            raise download_complete
-        print(
-            f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}'
-        )
-        return True
-    else:
-        download_file(
-            image_url,
-            output_path,
-            f'{title} - \
-[{data.get("rating")}]',
-        )
+    download_file(
+        image_url,
+        output_path,
+        f"{title} - \
+[{rating}]",
+    )
 
     if config.metadata is True:
+        dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
+        if config.json_description is True:
+            dsc = []
+        data = {
+            "id": view_id,
+            "filename": filename,
+            "author": author,
+            "date": s.find(class_="popup_date").attrs.get("title"),
+            "title": title,
+            "description": dsc,
+            "url": f"{config.BASE_URL}{path}",
+            "tags": [],
+            "category": s.find(class_="info").find(class_="category-name").text,
+            "type": s.find(class_="info").find(class_="type-name").text,
+            "species": s.find(class_="info").findAll("div")[2].find("span").text,
+            "gender": s.find(class_="info").findAll("div")[3].find("span").text,
+            "views": int(s.find(class_="views").find(class_="font-large").text),
+            "favorites": int(s.find(class_="favorites").find(class_="font-large").text),
+            "rating": rating,
+            "comments": [],
+        }
         create_metadata(output, data, s, title, filename)
     if config.download is not None:
-        print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}')
+        print(
+            f'{config.SUCCESS_COLOR}File saved as \
+"{output_path}" {config.END}'
+        )
+
     return True
 
+
 def download_file(url, fname, desc):
     try:
         r = session.get(url, stream=True)
@@ -121,7 +120,8 @@ def download_file(url, fname, desc):
         os.remove(fname)
         exit()
     return True
-    
+
+
 def create_metadata(output, data, s, title, filename):
     if config.rating is True:
         os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
@@ -163,4 +163,18 @@ def create_metadata(output, data, s, title, filename):
 
     # Write a UTF-8 encoded JSON file for metadata
     with open(f"{metadata}.json", "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+
+def file_exists_fallback(author, title):
+    if config.check is True:
+        print(
+            f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
+"{author}"{config.END}'
+        )
+        raise download_complete
+    print(
+        f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
+it\'s already downloaded{config.END}'
+    )
+    return True
diff --git a/Modules/functions.py b/Modules/functions.py
index 38da023..4320826 100644
--- a/Modules/functions.py
+++ b/Modules/functions.py
@@ -4,6 +4,8 @@ import re
 import browser_cookie3
 import requests
 from bs4 import BeautifulSoup
+from requests.adapters import HTTPAdapter
+from urllib3.util import Retry
 
 import Modules.config as config
 
@@ -13,39 +15,43 @@ if config.cookies is not None:  # add cookies if present
     cookies.load()
     session.cookies = cookies
 
+session.headers.update({"User-Agent": config.user_agent})
+
+
+def requests_retry_session(
+    retries=3,
+    backoff_factor=0.3,
+    status_forcelist=(500, 502, 504, 104),
+    session=None,
+):
+    session = session or requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+    )
+    adapter = HTTPAdapter(max_retries=retry)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    return session
+
 
 class download_complete(Exception):
     pass
 
 
 def check_filter(title):
-    search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
-|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
-|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
-|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
-|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
-|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
-|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
-|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
-|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
-|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
-|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
-|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
-|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
-|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
-|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
-|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
-|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
-|REM[insder]*\\b\
-|\\bREF|\\bSale|auction|multislot|stream|adopt'
 
     match = re.search(
-        search,
+        config.search,
         title,
         re.IGNORECASE,
     )
     if match is not None and title == match.string:
         return True
+
     return None
 
 
@@ -68,9 +74,7 @@ def system_message_handler(s):
     raise download_complete
 
 
-def login(user_agent):
-
-    session.headers.update({"User-Agent": user_agent})
+def login():
 
     CJ = browser_cookie3.load()
 
@@ -103,8 +107,6 @@ by using "-c cookies.txt"{config.END}'
 furaffinity in your browser, or you can export cookies.txt manually{config.END}"
         )
 
-    exit()
-
 
 def next_button(page_url):
     response = session.get(page_url)
@@ -130,15 +132,17 @@ def next_button(page_url):
             raise download_complete
         page_num = next_button.parent.attrs["action"].split("/")[-2]
     else:
+        next_button = s.find("a", class_="button standard right", text="Next")
         page_num = fav_next_button(s)
-    print(f"Downloading page {page_num} - {page_url}")
+    print(
+        f"Downloading page {page_num} - {config.BASE_URL}/{next_button.parent.attrs['action']}"
+    )
     return page_num
 
 
-def fav_next_button(s):
+def fav_next_button():
     # unlike galleries that are sequentially numbered, favorites use a different scheme.
     # the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
-    next_button = s.find("a", class_="button standard right", text="Next")
     if next_button is None:
         print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
         raise download_complete
diff --git a/Modules/index.py b/Modules/index.py
new file mode 100644
index 0000000..57fc959
--- /dev/null
+++ b/Modules/index.py
@@ -0,0 +1,37 @@
+import contextlib
+import re
+from pathlib import Path
+
+import Modules.config as config
+
+
+def start_indexing(path, layer=0):
+    """Recursively iterate over each item in path
+    and print item's name.
+    """
+
+    # make Path object from input string
+    path = Path(path)
+    with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
+
+        # iter the directory
+        for p in path.iterdir():
+
+            if p.is_file():
+                idx.write(f"{p}\n")
+
+            elif p.is_dir():
+                start_indexing(p, layer + 1)
+
+            else:
+                raise FileNotFoundError()
+
+
+def check_file(path):
+    view_id = path.split("/")[-2:-1][0]
+    with contextlib.suppress(FileNotFoundError):
+        with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
+            index = idx.read()
+            match = re.search(view_id, index)
+        if match is not None:
+            return True
diff --git a/furaffinity-dl.py b/furaffinity-dl.py
index 5118f2c..234d235 100644
--- a/furaffinity-dl.py
+++ b/furaffinity-dl.py
@@ -9,10 +9,14 @@ from bs4 import BeautifulSoup
 
 import Modules.config as config
 from Modules.download import download
+from Modules.functions import check_filter
 from Modules.functions import download_complete
 from Modules.functions import login
 from Modules.functions import next_button
+from Modules.functions import requests_retry_session
 from Modules.functions import system_message_handler
+from Modules.index import check_file
+from Modules.index import start_indexing
 
 # get session
 session = requests.session()
@@ -31,12 +35,13 @@ def main():
         while True:
             if config.stop == page_num:
                 print(
-                    f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}'
+                    f'{config.WARN_COLOR}Reached page "{config.stop}", \
+stopping.{config.END}'
                 )
                 break
 
             page_url = f"{download_url}/{page_num}"
-            response = session.get(page_url)
+            response = requests_retry_session(session=session).get(page_url)
             s = BeautifulSoup(response.text, "html.parser")
 
             # System messages
@@ -50,7 +55,30 @@ def main():
 
             # Download all images on the page
             for img in s.findAll("figure"):
-                download(img.find("a").attrs.get("href"))
+                title = img.find("figcaption").contents[0].text
+                img_url = img.find("a").attrs.get("href")
+
+                if config.submission_filter is True and check_filter(title) is True:
+                    print(
+                        f'{config.WARN_COLOR}"{title}" was filtered and will not be \
+downloaded - {config.BASE_URL}{img_url}{config.END}'
+                    )
+                    continue
+
+                if config.dont_redownload is True and check_file(img_url) is True:
+                    if config.check is True:
+                        print(
+                            f'{config.SUCCESS_COLOR}Downloaded all recent files of \
+"{config.username[0]}"{config.END}'
+                        )
+                        raise download_complete
+                    print(
+                        f'{config.WARN_COLOR}Skipping "{title}" since \
+it\'s already downloaded{config.END}'
+                    )
+                    continue
+
+                download(img_url)
                 sleep(config.interval)
 
             page_num = next_button(page_url)
@@ -58,13 +86,18 @@ def main():
 
 if __name__ == "__main__":
     if config.login is True:
-        login(config.user_agent)
+        login()
+        exit()
+
+    if config.index is True:
+        if os.path.isfile(f"{config.output_folder}/index.idx"):
+            os.remove(f"{config.output_folder}/index.idx")
+        start_indexing(config.output_folder)
+        print(f"{config.SUCCESS_COLOR}indexing finished{config.END}")
+        exit()
 
     try:
-        response = session.get(config.BASE_URL)
-    except ConnectionError:
-        print(f"{config.ERROR_COLOR}Connection failed{config.END}")
-        exit()
+        response = requests_retry_session(session=session).get(config.BASE_URL)
     except KeyboardInterrupt:
         print(f"{config.WARN_COLOR}Aborted by user{config.END}")
         exit()
@@ -72,14 +105,18 @@ if __name__ == "__main__":
     s = BeautifulSoup(response.text, "html.parser")
     if s.find(class_="loggedin_user_avatar") is not None:
         account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
-        print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}')
+        print(
+            f'{config.SUCCESS_COLOR}Logged in as \
+"{account_username}"{config.END}'
+        )
     else:
         print(
-            f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}"
+            f"{config.WARN_COLOR}Not logged in, NSFW content \
+is inaccessible{config.END}"
         )
 
     if config.download is not None:
-        download(config.download)
+        download(f"/view/{config.download}/")
         exit()
 
     if config.submissions is True:
@@ -109,15 +146,29 @@ downloading "{config.folder[1]}"{config.END}'
         )
         exit()
 
-    if os.path.exists(config.username[0]):
-        data = open(config.username[0]).read()
-        config.username = filter(None, data.split("\n"))
+    try:
+        if os.path.exists(config.username[0]):
+            data = open(config.username[0]).read()
+            config.username = filter(None, data.split("\n"))
+    except TypeError or AttributeError:
+        print(
+            f"{config.ERROR_COLOR}Please enter a username \
+or provide a file with usernames (1 username per line){config.END}"
+        )
+        exit()
 
     for username in config.username:
-        print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
-        download_url = f"{config.BASE_URL}/{config.category}/{username}"
-        main()
-        print(
-            f'{config.SUCCESS_COLOR}Finished \
-downloading "{username}"{config.END}'
+        username = username.split("#")[0].translate(
+            str.maketrans(config.username_replace_chars)
         )
+        if username != "":
+            print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
+            download_url = f"{config.BASE_URL}/{config.category}/{username}"
+            main()
+            print(
+                f'{config.SUCCESS_COLOR}Finished \
+downloading "{username}"{config.END}'
+            )
+    if os.path.isfile(f"{config.output_folder}/index.idx"):
+        os.remove(f"{config.output_folder}/index.idx")
+    start_indexing(config.output_folder)