diff --git a/.gitignore b/.gitignore index b6321ad..dc0ada2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,9 @@ cookies.txt # Default download folder Submissions/ -# vscode stuff -.vscode +#Dev stuff list.txt +.vscode +.idea +venv __pycache__ diff --git a/Modules/config.py b/Modules/config.py index f12a5c1..ef8959d 100644 --- a/Modules/config.py +++ b/Modules/config.py @@ -1,4 +1,5 @@ import argparse +import os parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -28,7 +29,8 @@ parser.add_argument( "username", nargs="?", help="username of the furaffinity \ -user", +user (if username is starting with '-' or '--' \ +provide them through a file instead)", ) parser.add_argument( "category", @@ -36,14 +38,16 @@ parser.add_argument( help="the category to download, gallery/scraps/favorites \ [default: gallery]", default="gallery", + type=str, ) -parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file") +parser.add_argument("--cookies", "-c", help="path to a NetScape cookies file", type=str) parser.add_argument( "--output", "-o", dest="output_folder", default="Submissions", help="set a custom output folder", + type=str, ) parser.add_argument( "--check", @@ -51,56 +55,53 @@ parser.add_argument( help="check and download latest submissions of a user", ) parser.add_argument( - "-ua", "--user-agent", + "-ua", dest="user_agent", default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \ Firefox/101.0", - help="Your browser's useragent, may be required, depending on your luck", + help="Your browser's user agent, may be required, depending on your luck", + type=str, ) parser.add_argument( - "-sub", "--submissions", + "-sub", action="store_true", help="download your \ submissions", ) parser.add_argument( - "-f", "--folder", + "-f", help="full path of the furaffinity gallery folder. for instance 123456/\ Folder-Name-Here", + type=str, ) +parser.add_argument("--start", default=1, help="page number to start from", type=str) parser.add_argument( - "-s", - "--start", - default=1, - help="page number to start from", -) -parser.add_argument( - "-S", "--stop", default=0, help="Page number to stop on. Specify the full URL after the username: for \ favorites pages (1234567890/next) or for submissions pages: \ (new~123456789@48)", + type=str, ) parser.add_argument( - "-rd", "--redownload", + "-rd", action="store_false", help="Redownload files that have been downloaded already", ) parser.add_argument( - "-i", "--interval", - type=int, + "-i", default=0, help="delay between downloading pages in seconds [default: 0]", + type=int, ) parser.add_argument( - "-r", "--rating", + "-r", action="store_false", help="disable rating separation", ) @@ -111,18 +112,17 @@ parser.add_argument( help="enable submission filter", ) parser.add_argument( - "-m", "--metadata", + "-m", action="store_true", help="enable metadata saving", ) parser.add_argument( - "--download", - help="download a specific submission by providing its id", + "--download", help="download a specific submission by providing its id", type=str ) parser.add_argument( - "-jd", "--json-description", + "-jd", dest="json_description", action="store_true", help="download description as a JSON list", @@ -147,6 +147,10 @@ category = args.category if username is not None: username = username.split(" ") + if os.path.exists(username[0]): + data = open(username[0]).read() + username = filter(None, data.split("\n")) + # Custom input cookies = args.cookies output_folder = args.output_folder @@ -199,4 +203,4 @@ search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ |TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ |TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ |REM[insder]*\\b\ -|\\bREF|\\bSale|auction|multislot|stream|adopt' +|\\bREF|\\bSale|auction|multislot|multi slot|stream|adopt' diff --git a/Modules/download.py b/Modules/download.py index e2251d1..4b8e1e5 100644 --- a/Modules/download.py +++ b/Modules/download.py @@ -1,28 +1,18 @@ -import http.cookiejar as cookielib import json import os -import requests from bs4 import BeautifulSoup from pathvalidate import sanitize_filename from tqdm import tqdm import Modules.config as config -from Modules.functions import download_complete +from Modules.functions import DownloadComplete from Modules.functions import requests_retry_session from Modules.functions import system_message_handler -session = requests.session() -if config.cookies is not None: # add cookies if present - cookies = cookielib.MozillaCookieJar(config.cookies) - cookies.load() - session.cookies = cookies - def download(path): - response = requests_retry_session(session=session).get( - f"{config.BASE_URL}{path}" - ) + response = requests_retry_session().get(f"{config.BASE_URL}{path}") s = BeautifulSoup(response.text, "html.parser") # System messages @@ -32,7 +22,7 @@ def download(path): image = s.find(class_="download").find("a").attrs.get("href") except AttributeError: print( - f"{config.ERROR_COLOR}uncessesful download of {config.BASE_URL}{path}{config.END}" + f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}" ) download(path) return True @@ -40,10 +30,14 @@ def download(path): filename = sanitize_filename(image.split("/")[-1:][0]) author = ( - s.find(class_="submission-id-sub-container").find("a").find("strong").text + s.find(class_="submission-id-sub-container") + .find("a") + .find("strong") + .text.replace(".", "._") ) + title = sanitize_filename( - s.find(class_="submission-title").find("p").contents[0] + str(s.find(class_="submission-title").find("p").contents[0]) ) view_id = int(path.split("/")[-2:-1][0]) @@ -70,18 +64,19 @@ def download(path): image_url = f"https:{image}" - if download_file(image_url, output_path, f"{title} - [{rating}]") is True: + if ( + download_file( + image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]" + ) + is True + ): with open( f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+" ) as idx: idx.write(f"({view_id})\n") if config.metadata is True: - dsc = ( - s.find(class_="submission-description") - .text.strip() - .replace("\r\n", "\n") - ) + dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n") if config.json_description is True: dsc = [] data = { @@ -98,9 +93,7 @@ def download(path): "species": s.find(class_="info").findAll("div")[2].find("span").text, "gender": s.find(class_="info").findAll("div")[3].find("span").text, "views": int(s.find(class_="views").find(class_="font-large").text), - "favorites": int( - s.find(class_="favorites").find(class_="font-large").text - ), + "favorites": int(s.find(class_="favorites").find(class_="font-large").text), "rating": rating, "comments": [], } @@ -114,17 +107,17 @@ def download(path): return True -def download_file(url, fname, desc): +def download_file(url, view_url, file_name, desc): try: - r = session.get(url, stream=True) + r = requests_retry_session().get(url, stream=True) if r.status_code != 200: print( f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \ -"{fname}". URL {url} ...skipping{config.END}' +"{file_name}" ({view_url}) ...skipping{config.END}' ) return False total = int(r.headers.get("Content-Length", 0)) - with open(fname, "wb") as file, tqdm( + with open(file_name, "wb") as file, tqdm( desc=desc.ljust(40), total=total, miniters=100, @@ -137,7 +130,7 @@ def download_file(url, fname, desc): bar.update(size) except KeyboardInterrupt: print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}") - os.remove(fname) + os.remove(file_name) exit() return True @@ -155,7 +148,7 @@ def create_metadata(output, data, s, title, filename): for desc in s.find("div", class_="submission-description").stripped_strings: data["description"].append(desc) - # Extact tags + # Extract tags try: for tag in s.find(class_="tags-row").findAll(class_="tags"): @@ -194,7 +187,7 @@ def file_exists_fallback(author, title, view_id): f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \ "{author}"{config.END}' ) - raise download_complete + raise DownloadComplete print( f'fallback: {config.WARN_COLOR}Skipping "{title}" since \ it\'s already downloaded{config.END}' diff --git a/Modules/functions.py b/Modules/functions.py index 6b75481..3789bbd 100644 --- a/Modules/functions.py +++ b/Modules/functions.py @@ -9,14 +9,6 @@ from urllib3.util import Retry import Modules.config as config -session = requests.session() -if config.cookies is not None: # add cookies if present - cookies = cookielib.MozillaCookieJar(config.cookies) - cookies.load() - session.cookies = cookies - -session.headers.update({"User-Agent": config.user_agent}) - def requests_retry_session( retries=3, @@ -24,7 +16,13 @@ def requests_retry_session( status_forcelist=(500, 502, 504, 104), session=None, ): + """Get a session, and retry in case of an error""" session = session or requests.Session() + if config.cookies is not None: # add cookies if present + cookies = cookielib.MozillaCookieJar(config.cookies) + cookies.load() + session.cookies = cookies + session.headers.update({"User-Agent": config.user_agent}) retry = Retry( total=retries, read=retries, @@ -38,11 +36,12 @@ def requests_retry_session( return session -class download_complete(Exception): +class DownloadComplete(Exception): pass def check_filter(title): + """Compare post title and search string, then return 'True' if match found""" match = re.search( config.search, @@ -56,6 +55,7 @@ def check_filter(title): def system_message_handler(s): + """Parse and return system message text""" try: message = { s.find(class_="notice-message") @@ -78,18 +78,19 @@ def system_message_handler(s): .text.strip() ) print(f"{config.WARN_COLOR}System Message: {message}{config.END}") - raise download_complete + raise DownloadComplete def login(): + """Get cookies from any browser with logged in furaffinity and save them to file""" + session = requests.Session() + cj = browser_cookie3.load() - CJ = browser_cookie3.load() + response = session.get(config.BASE_URL, cookies=cj) + fa_cookies = cj._cookies[".furaffinity.net"]["/"] - response = session.get(config.BASE_URL, cookies=CJ) - FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"] - - cookie_a = FA_COOKIES["a"] - cookie_b = FA_COOKIES["b"] + cookie_a = fa_cookies["a"] + cookie_b = fa_cookies["b"] s = BeautifulSoup(response.text, "html.parser") try: @@ -116,48 +117,51 @@ furaffinity in your browser, or you can export cookies.txt manually{config.END}" def next_button(page_url): - response = session.get(page_url) + """Parse Next button and get next page url""" + response = requests_retry_session().get(page_url) s = BeautifulSoup(response.text, "html.parser") if config.submissions is True: # unlike galleries that are sequentially numbered, submissions use a different scheme. # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new try: - next_button = s.find("a", class_="button standard more").attrs.get("href") + parse_next_button = s.find("a", class_="button standard more").attrs.get( + "href" + ) except AttributeError: try: - next_button = s.find("a", class_="button standard more-half").attrs.get( - "href" - ) + parse_next_button = s.find( + "a", class_="button standard more-half" + ).attrs.get("href") except AttributeError as e: print(f"{config.WARN_COLOR}Unable to find next button{config.END}") - raise download_complete from e - page_num = next_button.split("/")[-2] + raise DownloadComplete from e + page_num = parse_next_button.split("/")[-2] elif config.category != "favorites": - next_button = s.find("button", class_="button standard", text="Next") - if next_button is None or next_button.parent is None: + parse_next_button = s.find("button", class_="button standard", text="Next") + if parse_next_button is None or parse_next_button.parent is None: print(f"{config.WARN_COLOR}Unable to find next button{config.END}") - raise download_complete - page_num = next_button.parent.attrs["action"].split("/")[-2] + raise DownloadComplete + page_num = parse_next_button.parent.attrs["action"].split("/")[-2] else: - next_button = s.find("a", class_="button standard right", text="Next") - page_num = fav_next_button(s) + parse_next_button = s.find("a", class_="button standard right", text="Next") + page_num = fav_next_button(parse_next_button) print( - f"Downloading page {page_num} - {config.BASE_URL}{next_button.parent.attrs['action']}" + f"Downloading page {page_num} - {config.BASE_URL}{parse_next_button.parent.attrs['action']}" ) return page_num -def fav_next_button(): +def fav_next_button(parse_next_button): # unlike galleries that are sequentially numbered, favorites use a different scheme. # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) - if next_button is None: + if parse_next_button is None: print(f"{config.WARN_COLOR}Unable to find next button{config.END}") - raise download_complete - next_page_link = next_button.attrs["href"] + raise DownloadComplete + next_page_link = parse_next_button.attrs["href"] next_fav_num = re.search(r"\d+", next_page_link) if next_fav_num is None: print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}") - raise download_complete + raise DownloadComplete return f"{next_fav_num[0]}/next" diff --git a/Modules/index.py b/Modules/index.py index a90da21..016aa78 100644 --- a/Modules/index.py +++ b/Modules/index.py @@ -9,8 +9,8 @@ import Modules.config as config @lru_cache(maxsize=None) def start_indexing(path, layer=0): - """Recursively iterate over each item in path - and print item's name. + """Recursively iterate over each item in path, then + save and print item's name. """ # make Path object from input string @@ -23,7 +23,7 @@ def start_indexing(path, layer=0): if p.is_file(): name = p.stem ext = p.suffix - match = re.search(r"\([0-9]{5,}\)", name) + match = re.search(r"\(\d{5,}\)", name) if match is None and ext not in [".txt", ".idx"]: return @@ -39,6 +39,7 @@ def start_indexing(path, layer=0): @lru_cache(maxsize=None) def check_file(path): + """compare file view id with index list""" view_id = path.split("/")[-2:-1][0] with contextlib.suppress(FileNotFoundError): with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx: diff --git a/README.md b/README.md index 197b688..aa8ea89 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,20 @@ -This branch is the development version of furaffinity-dl rewritten in python. - # FurAffinity Downloader -**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favourites) from furaffinity users users or your submissons! -It was written for preservation of culture, to counter the people nuking their galleries every once a while. -and then modified for confinience. +**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favorites) from furaffinity users users or your submission notifications! +Mainly it was written for preservation of culture, to counter the people nuking their galleries every once a while. +But no-one is restricting you from just using is for convenience. Supports all known submission types: images, text, flash and audio. ## Requirements -`python 3` +`python3` (Recommended version is 3.10.x and above) `pip3 install -r requirements.txt` -**The script currently only works with the "Modern" theme** +furaffinity-dl has been tested on Linux and Windows OSs, however it should also work on Mac or any other platform that supports python. -furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows or any other platform that supports python. +***The script currently only works with the "Modern" theme*** ## Usage @@ -24,46 +22,47 @@ When downloading a folder make sure to put everything after **/folder/**, for ex ```help -usage: furaffinity-dl.py [-h] [-c COOKIES] [--output OUTPUT_FOLDER] [--check] [-ua USER_AGENT] [-sub] [-f FOLDER] [-s START [START ...]] - [-S STOP] [-rd] [-i INTERVAL] [-r] [--filter] [-m] [--download DOWNLOAD] [-jd] [--login] +usage: furaffinity-dl.py [-h] [--cookies COOKIES] [--output OUTPUT_FOLDER] [--check] [--user-agent USER_AGENT] [--submissions] [--folder FOLDER] [--start START] + [--stop STOP] [--redownload] [--interval INTERVAL] [--rating] [--filter] [--metadata] [--download DOWNLOAD] [--json-description] [--login] + [--index] [username] [category] Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications positional arguments: - username username of the furaffinity user + username username of the furaffinity user (if username is starting with '-' or '--' provide them through a file instead) category the category to download, gallery/scraps/favorites [default: gallery] options: -h, --help show this help message and exit - -c COOKIES, --cookies COOKIES + --cookies COOKIES, -c COOKIES path to a NetScape cookies file --output OUTPUT_FOLDER, -o OUTPUT_FOLDER set a custom output folder --check check and download latest submissions of a user - -ua USER_AGENT, --user-agent USER_AGENT - Your browser's useragent, may be required, depending on your luck - -sub, --submissions download your submissions - -f FOLDER, --folder FOLDER + --user-agent USER_AGENT, -ua USER_AGENT + Your browser's user agent, may be required, depending on your luck + --submissions, -sub download your submissions + --folder FOLDER, -f FOLDER full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here - -s START [START ...], --start START [START ...] - page number to start from - -S STOP, --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) - -rd, --redownload Redownload files that have been downloaded already - -i INTERVAL, --interval INTERVAL + --start START page number to start from + --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) + --redownload, -rd Redownload files that have been downloaded already + --interval INTERVAL, -i INTERVAL delay between downloading pages in seconds [default: 0] - -r, --rating disable rating separation + --rating, -r disable rating separation --filter enable submission filter - -m, --metadata enable metadata saving - --download DOWNLOAD download a specific submission /view/12345678/ - -jd, --json-description + --metadata, -m enable metadata saving + --download DOWNLOAD download a specific submission by providing its id + --json-description, -jd download description as a JSON list --login extract furaffinity cookies directly from your browser + --index create an index of downloaded files in an output folder Examples: python3 furaffinity-dl.py koul -> will download gallery of user koul python3 furaffinity-dl.py koul scraps -> will download scraps of user koul - python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox + python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox You can also download a several users in one go like this: python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox @@ -71,21 +70,21 @@ You can also provide a file with user names that are separated by a new line You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions - python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. ``` -You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, and use `python3 furaffinity-dl.py --login` to export furaffinity cookies from your web browser in Netscape format directly in file `cookies.txt` or export them manually with extensions: [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en), then you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): +You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, and use `python3 furaffinity-dl.py --login` to export furaffinity cookies from your web browser in Netscape format directly in to the file `cookies.txt` or export them manually with extensions: [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en), then you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): -`python3 furaffinity-dl.py letodoesart -c cookies.txt --user_agent 'Mozilla/5.0 ....'` +`python3 furaffinity-dl.py letodoesart -c cookies.txt --user-agent 'Mozilla/5.0 ....'` -## TODO + ## Disclaimer diff --git a/furaffinity-dl.py b/furaffinity-dl.py index 294f8fa..4f5cfba 100644 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -1,16 +1,14 @@ #!/usr/bin/python3 import contextlib -import http.cookiejar as cookielib import os from time import sleep -import requests from bs4 import BeautifulSoup import Modules.config as config from Modules.download import download from Modules.functions import check_filter -from Modules.functions import download_complete +from Modules.functions import DownloadComplete from Modules.functions import login from Modules.functions import next_button from Modules.functions import requests_retry_session @@ -18,20 +16,11 @@ from Modules.functions import system_message_handler from Modules.index import check_file from Modules.index import start_indexing -# get session -session = requests.session() -session.headers.update({"User-Agent": config.user_agent}) - -if config.cookies is not None: # add cookies if present - cookies = cookielib.MozillaCookieJar(config.cookies) - cookies.load() - session.cookies = cookies - def main(): - # download loop + """loop over and download all images on the page(s)""" page_num = config.start - with contextlib.suppress(download_complete): + with contextlib.suppress(DownloadComplete): while True: if config.stop == page_num: print( @@ -41,7 +30,7 @@ stopping.{config.END}' break page_url = f"{download_url}/{page_num}" - response = requests_retry_session(session=session).get(page_url) + response = requests_retry_session().get(page_url) s = BeautifulSoup(response.text, "html.parser") # System messages @@ -71,7 +60,7 @@ downloaded - {config.BASE_URL}{img_url}{config.END}' f'{config.SUCCESS_COLOR}Downloaded all recent files of \ "{username}"{config.END}' ) - raise download_complete + raise DownloadComplete print( f'{config.WARN_COLOR}Skipping "{title}" since \ it\'s already downloaded{config.END}' @@ -96,15 +85,12 @@ if __name__ == "__main__": print(f"{config.SUCCESS_COLOR}indexing finished{config.END}") exit() - try: - response = requests_retry_session(session=session).get(config.BASE_URL) - except KeyboardInterrupt: - print(f"{config.WARN_COLOR}Aborted by user{config.END}") - exit() - - s = BeautifulSoup(response.text, "html.parser") - if s.find(class_="loggedin_user_avatar") is not None: - account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") + one_time_response = requests_retry_session().get(config.BASE_URL) + one_time_s = BeautifulSoup(one_time_response.text, "html.parser") + if one_time_s.find(class_="loggedin_user_avatar") is not None: + account_username = one_time_s.find(class_="loggedin_user_avatar").attrs.get( + "alt" + ) print( f'{config.SUCCESS_COLOR}Logged in as \ "{account_username}"{config.END}' @@ -146,17 +132,6 @@ downloading "{config.folder[1]}"{config.END}' ) exit() - try: - if os.path.exists(config.username[0]): - data = open(config.username[0]).read() - config.username = filter(None, data.split("\n")) - except TypeError or AttributeError: - print( - f"{config.ERROR_COLOR}Please enter a username \ -or provide a file with usernames (1 username per line){config.END}" - ) - exit() - for username in config.username: username = username.split("#")[0].translate( str.maketrans(config.username_replace_chars) diff --git a/requirements.txt b/requirements.txt index 9cd4afa..2590c89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ -beautifulsoup4 +urllib3 requests +beautifulsoup4 tqdm -browser-cookie3 pathvalidate pre-commit +browser-cookie3