diff --git a/.gitignore b/.gitignore index 19621b0..46f09c0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,9 +8,10 @@ cookies.txt *.gif *.swf -# Download folder -furaffinity-dl/ +# Default download folder +Submissions/ # vscode stuff .vscode -ignore +list.txt +dl_dev.py diff --git a/README.md b/README.md index ebc392c..c6509e7 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ When downloading a folder make sure to put everything after **/folder/**, for ex ```help -usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]] [--start START [START ...]] - [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating] [--filter] [--metadata] [--download DOWNLOAD] - [-jd] [--login] +usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]] + [--start START [START ...]] [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating] + [--filter] [--metadata] [--download DOWNLOAD] [-jd] [--login] [--check] [--output] [username] [category] Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications @@ -52,18 +52,24 @@ options: --interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...] delay between downloading pages in seconds [default: 0] --rating, -r disable rating separation - --filter disable submission filter + --filter enable submission filter --metadata, -m enable metadata saving --download DOWNLOAD download a specific submission /view/12345678/ -jd, --json-description download description as a JSON list --login extract furaffinity cookies directly from your browser + --check check and download latest submissions of a user + --output, -o set a custom output folder Examples: python3 furaffinity-dl.py koul -> will download gallery of user koul python3 furaffinity-dl.py koul scraps -> will download scraps of user koul python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox +You can also download a several users in one go like this: + python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox +You can also provide a file with user names that are separated by a new line + You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications diff --git a/furaffinity-dl.py b/furaffinity-dl.py index 2abe8d0..7e9b991 100644 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 import argparse +import contextlib import http.cookiejar as cookielib import json import os @@ -8,8 +9,10 @@ from time import sleep import requests from bs4 import BeautifulSoup +from pathvalidate import sanitize_filename from tqdm import tqdm + # COLORS WHITE = "\033[1;37m" RED = "\033[1;91m" @@ -20,33 +23,54 @@ END = "\033[0m" # Argument parsing parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, - description="Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications", + description="Downloads the entire gallery/scraps/folder/favorites \ +of a furaffinity user, or your submissions notifications", epilog=""" Examples: python3 furaffinity-dl.py koul -> will download gallery of user koul python3 furaffinity-dl.py koul scraps -> will download scraps of user koul - python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox \n -You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: - python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions - python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications \n -DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. + python3 furaffinity-dl.py mylafox favorites -> will download favorites \ +of user mylafox \n +You can also download a several users in one go like this: + python3 furaffinity-dl.py "koul radiquum mylafox" \ +-> will download gallery of users koul -> radiquum -> mylafox +You can also provide a file with user names that are separated by a new line\n +You can also log in to FurAffinity in a web browser and load cookies to \ +download age restricted content or submissions: + python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \ +gallery of user letodoesart including Mature and Adult submissions + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \ +submissions notifications \n +DISCLAIMER: It is your own responsibility to check whether batch downloading \ +is allowed by FurAffinity terms of service and to abide by them. """, ) -parser.add_argument("username", nargs="?", help="username of the furaffinity user") +parser.add_argument( + "username", + nargs="?", + help="username of the furaffinity \ +user", +) parser.add_argument( "category", nargs="?", - help="the category to download, gallery/scraps/favorites [default: gallery]", + help="the category to download, gallery/scraps/favorites \ +[default: gallery]", default="gallery", ) parser.add_argument( - "-sub", "--submissions", action="store_true", help="download your submissions" + "-sub", + "--submissions", + action="store_true", + help="download your \ +submissions", ) parser.add_argument( "-f", "--folder", nargs="+", - help="full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here", + help="full path of the furaffinity gallery folder. for instance 123456/\ +Folder-Name-Here", ) parser.add_argument( "-c", "--cookies", nargs="+", help="path to a NetScape cookies file" @@ -57,7 +81,8 @@ parser.add_argument( dest="user_agent", nargs="+", default=[ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \ +Firefox/101.0" ], help="Your browser's useragent, may be required, depending on your luck", ) @@ -69,7 +94,9 @@ parser.add_argument( "-S", default=[0], nargs="+", - help="Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)", + help="Page number to stop on. Specify the full URL after the username: for \ +favorites pages (1234567890/next) or for submissions pages: \ +(new~123456789@48)", ) parser.add_argument( "--redownload", @@ -94,8 +121,8 @@ parser.add_argument( ) parser.add_argument( "--filter", - action="store_false", - help="disable submission filter", + action="store_true", + help="enable submission filter", ) parser.add_argument( "--metadata", @@ -119,30 +146,25 @@ parser.add_argument( action="store_true", help="extract furaffinity cookies directly from your browser", ) +parser.add_argument( + "--check", + action="store_true", + help="check and download latest submissions of a user", +) +parser.add_argument( + "--output", + "-o", + dest="output_folder", + default="Submissions", + help="set a custom output folder", +) args = parser.parse_args() BASE_URL = "https://www.furaffinity.net" -username = args.username - -if ( - args.submissions is False and args.login is False and args.download is None -): # check if you are not downloading submissions or a specific post - categories = { - "gallery": "gallery", - "scraps": "scraps", - "favorites": "favorites", - } - category = categories.get(args.category) - if args.username is None: - print(f"{RED} please enter a FA Username{END}") - exit() - if category is None: - print(f"{RED} please enter a valid category gallery/scraps/favorites{END}") - exit() - - download_url = f"{BASE_URL}/{category}/{username}" - output = f"furaffinity-dl/{category}/{username}" +if args.username != None: + username = args.username.split(" ") +category = args.category # get session session = requests.session() @@ -153,7 +175,7 @@ if args.cookies is not None: # add cookies if present cookies.load() session.cookies = cookies -# File downloading +# Functions def download_file(url, fname, desc): @@ -161,13 +183,14 @@ def download_file(url, fname, desc): r = session.get(url, stream=True) if r.status_code != 200: print( - f"{RED} Got a HTTP {r.status_code} while downloading {fname}; ...skipping{END}" + f'{RED}Got a HTTP {r.status_code} while downloading \ +"{fname}". URL {url} ...skipping{END}' ) return False total = int(r.headers.get("Content-Length", 0)) with open(fname, "wb") as file, tqdm( - desc=desc.ljust(40)[:40], + desc=desc.ljust(40), total=total, miniters=100, unit="b", @@ -178,282 +201,105 @@ def download_file(url, fname, desc): size = file.write(data) bar.update(size) except KeyboardInterrupt: - print(f"{GREEN} Finished downloading{END}") + print(f"{GREEN}Finished downloading{END}") os.remove(fname) exit() return True -# checks if you are downloading forder or submission -if args.folder is not None: - folder = args.folder[0].split("/") - download_url = f"{BASE_URL}/gallery/{username}/folder/{args.folder[0]}" - output = f"furaffinity-dl/folders/{username}/{folder[1]}" -if args.submissions is True: - download_url = f"{BASE_URL}/msg/submissions" - - -def download(path): - response = session.get(f"{BASE_URL}{path}") - s = BeautifulSoup(response.text, "html.parser") - - # System messages - if s.find(class_="notice-message") is not None: - try: - message = ( - s.find(class_="notice-message") - .find("div") - .find(class_="link-override") - .text.strip() - .replace(".", ". \n") - ) - except AttributeError: - message = ( - s.find(class_="notice-message") - .find("div", class_="section-body alignleft") - .find("div", class_="redirect-message") - .text.strip() - .replace(".", ". \n") - ) - print(f"{YELLOW} System Message: {message}{END}") - exit() - - image = s.find(class_="download").find("a").attrs.get("href") - title = f' {s.find(class_="submission-title").find("p").contents[0]} ' - description = ( - s.find(class_="submission-description").text.strip().replace("\r\n", "\n") - ) - - if args.json_description is True: - description = [] - filename = image.split("/")[-1:][0] - data = { - "id": int(path.split("/")[-2:-1][0]), - "filename": filename, - "author": s.find(class_="submission-id-sub-container") - .find("a") - .find("strong") - .text, - "date": s.find(class_="popup_date").attrs.get("title"), - "title": title, - "description": description, - "url": f"{BASE_URL}{path}", - "tags": [], - "category": s.find(class_="info").find(class_="category-name").text, - "type": s.find(class_="info").find(class_="type-name").text, - "species": s.find(class_="info").findAll("div")[2].find("span").text, - "gender": s.find(class_="info").findAll("div")[3].find("span").text, - "views": int(s.find(class_="views").find(class_="font-large").text), - "favorites": int(s.find(class_="favorites").find(class_="font-large").text), - "rating": s.find(class_="rating-box").text.strip(), - "comments": [], - } - - if args.submissions is True or args.download is not None: - global output - output = f"furaffinity-dl/gallery/{data.get('author')}" - - if args.filter is True: - match = re.search( - 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE|REM[insder]*\\W|\\bREF|\\bSale\\W|auction|multislot|stream|adopt', - title, - re.IGNORECASE, +def system_message_handler(s): + try: + message = { + s.find(class_="notice-message") + .find("div").find(class_="link-override").text.strip() + } + except AttributeError: + message = ( + s.find("section", class_="aligncenter notice-message") + .find("div", class_="section-body alignleft") + .find("div", class_="redirect-message") + .text.strip() ) - if match is not None and title == match.string: - print( - f"{YELLOW} post:{title}was filtered and will not be downloaded - {data.get('url')}{END}" - ) - return True + print(f"{YELLOW}System Message: {message}{END}") + raise System_Message - image_url = f"https:{image}" - os.makedirs(output, exist_ok=True) - global output_path - output_path = f"{output}/{filename}" +def check_filter(title, url): + search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ +|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\ +|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\ +|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\ +|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\ +|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ +|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ +|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\ +|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ +|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\ +|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ +|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ +|REM[insder]*\\b\ +|\\bREF|\\bSale|auction|multislot|stream|adopt' + + match = re.search( + search, + title, + re.IGNORECASE, + ) + if match is not None and title == match.string: + print( + f'{YELLOW}"{title}" was filtered and will not be \ +downloaded - {url}{END}' + ) + return True + + +def create_metadata(output, data, s, title, filename): + os.makedirs(f'{output}/metadata', exist_ok=True) + metadata = f"{output}/metadata/{title} - {filename}" if args.rating is True: - os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) - output_path = f'{output}/{data.get("rating")}/{filename}' + os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True) + metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}' - if args.dont_redownload is True and os.path.isfile(output_path): - print(f"{YELLOW} Skipping:{title} since it's already downloaded{END}") - else: - download_file(image_url, output_path, title) + # Extract description as list + if args.json_description is True: + for desc in s.find("div", class_="submission-description").stripped_strings: + data["description"].append(desc) - if args.metadata is True: + # Extact tags - metadata = output_path + try: + for tag in s.find(class_="tags-row").findAll(class_="tags"): + data["tags"].append(tag.find("a").text) + except AttributeError: + print(f'{YELLOW}"{title}" has no tags{END}') - # Extract description as list - if args.json_description is True: - for desc in s.find("div", class_="submission-description").stripped_strings: + # Extract comments + for comment in s.findAll(class_="comment_container"): + temp_ele = comment.find(class_="comment-parent") + parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:]) + # Comment is deleted or hidden + if comment.find(class_="comment-link") is None: + continue - if re.search("<", desc) is True: - desc = desc.replace("<", "") + data["comments"].append( + { + "cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]), + "parent_cid": parent_cid, + "content": comment.find(class_="comment_text").contents[0].strip(), + "username": comment.find(class_="comment_username").text, + "date": comment.find(class_="popup_date").attrs.get("title"), + } + ) - if re.search(">", desc) is True: - desc = desc.replace(">", "") - - if re.search("/", desc) is True: - desc = desc.replace("/", "") - - data["description"].append(desc) - - # Extact tags - - try: - for tag in s.find(class_="tags-row").findAll(class_="tags"): - data["tags"].append(tag.find("a").text) - except AttributeError: - print(f"{YELLOW} post:{title} has no tags{END}") - - # Extract comments - for comment in s.findAll(class_="comment_container"): - temp_ele = comment.find(class_="comment-parent") - parent_cid = ( - None if temp_ele is None else int(temp_ele.attrs.get("href")[5:]) - ) - # Comment is deleted or hidden - if comment.find(class_="comment-link") is None: - continue - - data["comments"].append( - { - "cid": int( - comment.find(class_="comment-link").attrs.get("href")[5:] - ), - "parent_cid": parent_cid, - "content": comment.find(class_="comment_text").contents[0].strip(), - "username": comment.find(class_="comment_username").text, - "date": comment.find(class_="popup_date").attrs.get("title"), - } - ) - - # Write a UTF-8 encoded JSON file for metadata - with open(f"{metadata}.json", "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - return True - - -if args.download is not None: - download(args.download) - print(f"{GREEN} File saved as {output_path} {END}") - exit() - -# Main function - - -def main(): - # check if you are logged in - page_end = args.stop[0] - page_num = args.start[0] - page_url = f"{download_url}/{page_num}" - response = session.get(page_url) - s = BeautifulSoup(response.text, "html.parser") - if s.find(class_="loggedin_user_avatar") is not None: - account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") - print(f"{GREEN} Logged in as: {account_username}{END}") - else: - print(f"{YELLOW} Not logged in, NSFW content is inaccessible{END}") - - # download loop - while True: - if page_end == page_num: - print(f"{YELLOW} Reached page {page_end}, stopping.{END}") - break - - page_url = f"{download_url}/{page_num}" - response = session.get(page_url) - s = BeautifulSoup(response.text, "html.parser") - - # System messages - if s.find(class_="notice-message") is not None: - try: - message = ( - s.find(class_="notice-message") - .find("div") - .find(class_="link-override") - .text.strip() - .replace(".", ". \n") - ) - except AttributeError: - message = ( - s.find(class_="notice-message") - .find("div", class_="section-body alignleft") - .find("div", class_="redirect-message") - .text.strip() - .replace(".", ". \n") - ) - print(f"{YELLOW} System Message: {message}{END}") - exit() - - # End of gallery - if s.find(id="no-images") is not None: - print(f"{GREEN} End of gallery{END}") - break - - # Download all images on the page - for img in s.findAll("figure"): - download(img.find("a").attrs.get("href")) - sleep(args.interval[0]) - - # Download submissions - if args.submissions is True: - try: - next_button = s.find("a", class_="button standard more").attrs.get( - "href" - ) - except AttributeError: - try: - next_button = s.find( - "a", class_="button standard more-half" - ).attrs.get("href") - except AttributeError: - print(f"{YELLOW} Unable to find next button{END}") - break - - # unlike galleries that are sequentially numbered, submissions use a different scheme. - # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new - - page_num = next_button.split("/")[-2] - page_url = f"{BASE_URL}{next_button}" - - print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") - # Download everything else - elif args.category != "favorites": - next_button = s.find("button", class_="button standard", text="Next") - if next_button is None or next_button.parent is None: - print(f"{YELLOW} Unable to find next button{END}") - break - - page_num = next_button.parent.attrs["action"].split("/")[-2] - - print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") - # Download favorites - else: - next_button = s.find("a", class_="button standard right", text="Next") - if next_button is None: - print(f"{YELLOW} Unable to find next button{END}") - break - - # unlike galleries that are sequentially numbered, favorites use a different scheme. - # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) - - next_page_link = next_button.attrs["href"] - next_fav_num = re.search(r"\d+", next_page_link) - - if next_fav_num is None: - print(f"{YELLOW} Failed to parse next favorite link{END}") - break - - page_num = next_fav_num.group(0) + "/next" - - # parse it into numbers/next - - print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") - - print(f"{GREEN}Finished downloading{END}") + # Write a UTF-8 encoded JSON file for metadata + with open(f"{metadata}.json", "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) def login(): @@ -470,7 +316,7 @@ def login(): try: s.find(class_="loggedin_user_avatar") account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") - print(f"{GREEN} Logged in as: {account_username}{END}") + print(f"{GREEN}Logged in as: {account_username}{END}") with open("cookies.txt", "w") as file: file.write( f"""# Netscape HTTP Cookie File @@ -480,18 +326,218 @@ def login(): .furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}""" ) print( - f'{GREEN} cookies saved successfully, now you can provide them by using "-c cookies.txt"{END}' + f'{GREEN}cookies saved successfully, now you can provide them \ +by using "-c cookies.txt"{END}' ) except AttributeError: print( - f"{RED} Error getting cookies, either you need to login into furaffinity in your browser, or you can export cookies.txt manually{END}" + f"{RED}Error getting cookies, either you need to login into \ +furaffinity in your browser, or you can export cookies.txt manually{END}" ) exit() +# File downloading + + +class Check_Complete(Exception): + pass + +class System_Message(Exception): + pass + + +def download(path): + response = session.get(f"{BASE_URL}{path}") + s = BeautifulSoup(response.text, "html.parser") + + # System messages + if s.find(class_="notice-message") is not None: + system_message_handler(s) + + image = s.find(class_="download").find("a").attrs.get("href") + title = s.find(class_="submission-title").find("p").contents[0] + title = sanitize_filename(title) + dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n") + + if args.json_description is True: + dsc = [] + filename = image.split("/")[-1:][0] + data = { + "id": int(path.split("/")[-2:-1][0]), + "filename": filename, + "author": s.find(class_="submission-id-sub-container") + .find("a") + .find("strong") + .text, + "date": s.find(class_="popup_date").attrs.get("title"), + "title": title, + "description": dsc, + "url": f"{BASE_URL}{path}", + "tags": [], + "category": s.find(class_="info").find(class_="category-name").text, + "type": s.find(class_="info").find(class_="type-name").text, + "species": s.find(class_="info").findAll("div")[2].find("span").text, + "gender": s.find(class_="info").findAll("div")[3].find("span").text, + "views": int(s.find(class_="views").find(class_="font-large").text), + "favorites": int(s.find(class_="favorites").find(class_="font-large").text), + "rating": s.find(class_="rating-box").text.strip(), + "comments": [], + } + if args.filter is True: + check_filter(title, data.get("url")) + + image_url = f"https:{image}" + output = f"{args.output_folder}/{data.get('author')}" + if category != "gallery": + output = f"{args.output_folder}/{data.get('author')}/{category}" + if args.folder is not None: + output = f"{args.output_folder}/{data.get('author')}/{folder[1]}" + os.makedirs(output, exist_ok=True) + filename = sanitize_filename(filename) + global output_path + output_path = f"{output}/{title} - {filename}" + if args.rating is True: + os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) + output_path = f'{output}/{data.get("rating")}/{title} - {filename}' + + if args.dont_redownload is True and os.path.isfile(output_path): + if args.check is True: + print(f"{GREEN} Downloaded all recent files of \"{data.get('author')}\"") + raise Check_Complete + print(f'{YELLOW}Skipping "{title}" since it\'s already downloaded{END}') + return True + else: + download_file( + image_url, + output_path, + f'{title} - \ +[{data.get("rating")}]', + ) + + if args.metadata is True: + create_metadata(output, data, s, title, filename) + return True + + +# Main function + + +def main(): + page_end = args.stop[0] + page_num = args.start[0] + + # download loop + with contextlib.suppress(Check_Complete, System_Message): + while True: + if page_end == page_num: + print(f"{YELLOW}Reached page {page_end}, stopping.{END}") + break + + page_url = f"{download_url}/{page_num}" + response = session.get(page_url) + s = BeautifulSoup(response.text, "html.parser") + + # System messages + if s.find(class_="notice-message") is not None: + system_message_handler(s) + + # End of gallery + if s.find(id="no-images") is not None: + print(f"{GREEN}End of gallery{END}") + break + + # Download all images on the page + for img in s.findAll("figure"): + download(img.find("a").attrs.get("href")) + sleep(args.interval[0]) + + # Download submissions + if args.submissions is True: + try: + next_button = s.find("a", class_="button standard more").attrs.get( + "href" + ) + except AttributeError: + try: + next_button = s.find( + "a", class_="button standard more-half" + ).attrs.get("href") + except AttributeError: + print(f"{YELLOW}Unable to find next button{END}") + break + # unlike galleries that are sequentially numbered, submissions use a different scheme. + # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new + + page_num = next_button.split("/")[-2] + page_url = f"{BASE_URL}{next_button}" + + elif args.category != "favorites": + next_button = s.find("button", class_="button standard", text="Next") + if next_button is None or next_button.parent is None: + print(f"{YELLOW}Unable to find next button{END}") + break + + page_num = next_button.parent.attrs["action"].split("/")[-2] + else: + next_button = s.find("a", class_="button standard right", text="Next") + if next_button is None: + print(f"{YELLOW}Unable to find next button{END}") + break + + # unlike galleries that are sequentially numbered, favorites use a different scheme. + # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) + + next_page_link = next_button.attrs["href"] + next_fav_num = re.search(r"\d+", next_page_link) + + if next_fav_num is None: + print(f"{YELLOW}Failed to parse next favorite link{END}") + break + + page_num = f"{next_fav_num[0]}/next" + + print(f"{WHITE}Downloading page {page_num} - {page_url} {END}") + print( + f"{GREEN}Finished \ +downloading{END}" + ) + + if __name__ == "__main__": if args.login is True: login() - main() + response = session.get(BASE_URL) + s = BeautifulSoup(response.text, "html.parser") + if s.find(class_="loggedin_user_avatar") is not None: + account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") + print(f'{GREEN}Logged in as "{account_username}"{END}') + else: + print(f"{YELLOW}Not logged in, NSFW content is inaccessible{END}") + + if args.download is not None: + download(args.download) + print(f'{GREEN}File saved as "{output_path}" {END}') + exit() + + if args.submissions is True: + download_url = f"{BASE_URL}/msg/submissions" + main() + exit() + + if args.folder is not None: + folder = args.folder[0].split("/") + download_url = f"{BASE_URL}/gallery/{username[0]}/folder/{args.folder[0]}" + main() + exit() + + if os.path.exists(username[0]): + data = open(username[0]).read() + username = filter(None, data.split("\n")) + + for username in username: + print(f'{GREEN}Now downloading "{username}"{END}') + download_url = f"{BASE_URL}/{category}/{username}" + main() diff --git a/requirements.txt b/requirements.txt index db5bd8b..aed3318 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ beautifulsoup4 requests tqdm browser-cookie3 +pathvalidate