From 54478d3ed3469aaaea8a0f4beb527d63cc855c90 Mon Sep 17 00:00:00 2001 From: Radiquum Date: Wed, 6 Jul 2022 16:54:34 +0500 Subject: [PATCH] divided into multiple files --- .gitignore | 2 +- Modules/config.py | 170 +++++++++++++ Modules/functions.py | 312 ++++++++++++++++++++++++ README.md | 37 ++- furaffinity-dl.py | 559 ++++++------------------------------------- requirements.txt | 1 + 6 files changed, 572 insertions(+), 509 deletions(-) create mode 100644 Modules/config.py create mode 100644 Modules/functions.py diff --git a/.gitignore b/.gitignore index 46f09c0..b6321ad 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ Submissions/ # vscode stuff .vscode list.txt -dl_dev.py +__pycache__ diff --git a/Modules/config.py b/Modules/config.py new file mode 100644 index 0000000..45d5b70 --- /dev/null +++ b/Modules/config.py @@ -0,0 +1,170 @@ +import argparse + +parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description="Downloads the entire gallery/scraps/folder/favorites \ +of a furaffinity user, or your submissions notifications", + epilog=""" +Examples: + python3 furaffinity-dl.py koul -> will download gallery of user koul + python3 furaffinity-dl.py koul scraps -> will download scraps of user koul + python3 furaffinity-dl.py mylafox favorites -> will download favorites \ +of user mylafox \n +You can also download a several users in one go like this: + python3 furaffinity-dl.py "koul radiquum mylafox" \ +-> will download gallery of users koul -> radiquum -> mylafox +You can also provide a file with user names that are separated by a new line\n +You can also log in to FurAffinity in a web browser and load cookies to \ +download age restricted content or submissions: + python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \ +gallery of user letodoesart including Mature and Adult submissions + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \ +submissions notifications \n +DISCLAIMER: It is your own responsibility to check whether batch downloading \ +is allowed by FurAffinity terms of service and to abide by them. +""", +) +parser.add_argument( + "username", + nargs="?", + help="username of the furaffinity \ +user", +) +parser.add_argument( + "category", + nargs="?", + help="the category to download, gallery/scraps/favorites \ +[default: gallery]", + default="gallery", +) +parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file") +parser.add_argument( + "--output", + "-o", + dest="output_folder", + default="Submissions", + help="set a custom output folder", +) +parser.add_argument( + "--check", + action="store_true", + help="check and download latest submissions of a user", +) +parser.add_argument( + "-ua", + "--user-agent", + dest="user_agent", + default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \ +Firefox/101.0", + help="Your browser's useragent, may be required, depending on your luck", +) +parser.add_argument( + "-sub", + "--submissions", + action="store_true", + help="download your \ +submissions", +) +parser.add_argument( + "-f", + "--folder", + help="full path of the furaffinity gallery folder. for instance 123456/\ +Folder-Name-Here", +) +parser.add_argument( + "-s", "--start", default=1, help="page number to start from", nargs="+" +) +parser.add_argument( + "-S", + "--stop", + default=0, + help="Page number to stop on. Specify the full URL after the username: for \ +favorites pages (1234567890/next) or for submissions pages: \ +(new~123456789@48)", +) +parser.add_argument( + "-rd", + "--redownload", + action="store_false", + help="Redownload files that have been downloaded already", +) +parser.add_argument( + "-i", + "--interval", + type=int, + default=0, + help="delay between downloading pages in seconds [default: 0]", +) +parser.add_argument( + "-r", + "--rating", + action="store_false", + help="disable rating separation", +) +parser.add_argument( + "--filter", + action="store_true", + dest="submission_filter", + help="enable submission filter", +) +parser.add_argument( + "-m", + "--metadata", + action="store_true", + help="enable metadata saving", +) +parser.add_argument( + "--download", + help="download a specific submission /view/12345678/", +) +parser.add_argument( + "-jd", + "--json-description", + dest="json_description", + action="store_true", + help="download description as a JSON list", +) +parser.add_argument( + "--login", + action="store_true", + help="extract furaffinity cookies directly from your browser", +) + +args = parser.parse_args() + +# positional +username = args.username +category = args.category + +if username != None: + username = username.split(" ") + +# Custom input +cookies = args.cookies +output_folder = args.output_folder +download = args.download +interval = args.interval +user_agent = args.user_agent +start = args.start +stop = args.stop +folder = args.folder + +# True\False + +login = args.login +check = args.check +submissions = args.submissions +json_description = args.json_description +metadata = args.metadata +dont_redownload = args.redownload +rating = args.rating +submission_filter = args.submission_filter + +# Colors +SUCCESS_COLOR = "\033[1;92m" +WARN_COLOR = "\033[1;33m" +ERROR_COLOR = "\033[1;91m" +END = "\033[0m" + +# Globals +BASE_URL = "https://www.furaffinity.net" diff --git a/Modules/functions.py b/Modules/functions.py new file mode 100644 index 0000000..4a50413 --- /dev/null +++ b/Modules/functions.py @@ -0,0 +1,312 @@ +import http.cookiejar as cookielib +import json +import os +import re + +import browser_cookie3 +import requests +from bs4 import BeautifulSoup +from pathvalidate import sanitize_filename +from tqdm import tqdm + +import Modules.config as config + +session = requests.session() +if config.cookies is not None: # add cookies if present + cookies = cookielib.MozillaCookieJar(config.cookies) + cookies.load() + session.cookies = cookies + + +class download_complete(Exception): + pass + + +def download(path): + response = session.get(f"{config.BASE_URL}{path}") + s = BeautifulSoup(response.text, "html.parser") + + # System messages + if s.find(class_="notice-message") is not None: + system_message_handler(s) + + image = s.find(class_="download").find("a").attrs.get("href") + title = s.find(class_="submission-title").find("p").contents[0] + title = sanitize_filename(title) + dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n") + + if config.json_description is True: + dsc = [] + filename = image.split("/")[-1:][0] + data = { + "id": int(path.split("/")[-2:-1][0]), + "filename": filename, + "author": s.find(class_="submission-id-sub-container") + .find("a") + .find("strong") + .text, + "date": s.find(class_="popup_date").attrs.get("title"), + "title": title, + "description": dsc, + "url": f"{config.BASE_URL}{path}", + "tags": [], + "category": s.find(class_="info").find(class_="category-name").text, + "type": s.find(class_="info").find(class_="type-name").text, + "species": s.find(class_="info").findAll("div")[2].find("span").text, + "gender": s.find(class_="info").findAll("div")[3].find("span").text, + "views": int(s.find(class_="views").find(class_="font-large").text), + "favorites": int(s.find(class_="favorites").find(class_="font-large").text), + "rating": s.find(class_="rating-box").text.strip(), + "comments": [], + } + if config.submission_filter is True and check_filter(title) is True: + print( + f'{config.WARN_COLOR}"{title}" was filtered and will not be \ +downloaded - {data.get("url")}{config.END}' + ) + return True + + image_url = f"https:{image}" + output = f"{config.output_folder}/{data.get('author')}" + if config.category != "gallery": + output = f"{config.output_folder}/{data.get('author')}/{config.category}" + if config.folder is not None: + output = f"{config.output_folder}/{data.get('author')}/{config.folder}" + os.makedirs(output, exist_ok=True) + filename = sanitize_filename(filename) + output_path = f"{output}/{title} - {filename}" + if config.rating is True: + os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) + output_path = f'{output}/{data.get("rating")}/{title} - {filename}' + + if config.dont_redownload is True and os.path.isfile(output_path): + if config.check is True: + print( + f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}" + ) + raise download_complete + print( + f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}' + ) + return True + else: + download_file( + image_url, + output_path, + f'{title} - \ +[{data.get("rating")}]', + ) + + if config.metadata is True: + create_metadata(output, data, s, title, filename) + if config.download is not None: + print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}') + return True + + +def create_metadata(output, data, s, title, filename): + if config.rating is True: + os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True) + metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}' + else: + os.makedirs(f"{output}/metadata", exist_ok=True) + metadata = f"{output}/metadata/{title} - {filename}" + + # Extract description as list + if config.json_description is True: + for desc in s.find("div", class_="submission-description").stripped_strings: + data["description"].append(desc) + + # Extact tags + + try: + for tag in s.find(class_="tags-row").findAll(class_="tags"): + data["tags"].append(tag.find("a").text) + except AttributeError: + print(f'{config.WARN_COLOR}"{title}" has no tags{config.END}') + + # Extract comments + for comment in s.findAll(class_="comment_container"): + temp_ele = comment.find(class_="comment-parent") + parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:]) + # Comment is deleted or hidden + if comment.find(class_="comment-link") is None: + continue + + data["comments"].append( + { + "cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]), + "parent_cid": parent_cid, + "content": comment.find(class_="comment_text").contents[0].strip(), + "username": comment.find(class_="comment_username").text, + "date": comment.find(class_="popup_date").attrs.get("title"), + } + ) + + # Write a UTF-8 encoded JSON file for metadata + with open(f"{metadata}.json", "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + + +def check_filter(title): + search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ +|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\ +|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\ +|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\ +|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\ +|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ +|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ +|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ +|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\ +|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ +|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\ +|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ +|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ +|REM[insder]*\\b\ +|\\bREF|\\bSale|auction|multislot|stream|adopt' + + match = re.search( + search, + title, + re.IGNORECASE, + ) + if match is not None and title == match.string: + return True + return None + + +def system_message_handler(s): + try: + message = { + s.find(class_="notice-message") + .find("div") + .find(class_="link-override") + .text.strip() + } + except AttributeError: + message = ( + s.find("section", class_="aligncenter notice-message") + .find("div", class_="section-body alignleft") + .find("div", class_="redirect-message") + .text.strip() + ) + print(f"{config.WARN_COLOR}System Message: {message}{config.END}") + raise download_complete + + +def download_file(url, fname, desc): + try: + r = session.get(url, stream=True) + if r.status_code != 200: + print( + f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \ +"{fname}". URL {url} ...skipping{config.END}' + ) + return False + + total = int(r.headers.get("Content-Length", 0)) + with open(fname, "wb") as file, tqdm( + desc=desc.ljust(40), + total=total, + miniters=100, + unit="b", + unit_scale=True, + unit_divisor=1024, + ) as bar: + for data in r.iter_content(chunk_size=1024): + size = file.write(data) + bar.update(size) + except KeyboardInterrupt: + print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}") + os.remove(fname) + exit() + + return True + + +def login(user_agent): + + session.headers.update({"User-Agent": user_agent}) + + CJ = browser_cookie3.load() + + response = session.get(config.BASE_URL, cookies=CJ) + FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"] + + cookie_a = FA_COOKIES["a"] + cookie_b = FA_COOKIES["b"] + + s = BeautifulSoup(response.text, "html.parser") + try: + s.find(class_="loggedin_user_avatar") + account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") + print(f"{config.SUCCESS_COLOR}Logged in as: {account_username}{config.END}") + with open("cookies.txt", "w") as file: + file.write( + f"""# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. +.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value} +.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}""" + ) + print( + f'{config.SUCCESS_COLOR}cookies saved successfully, now you can provide them \ +by using "-c cookies.txt"{config.END}' + ) + except AttributeError: + print( + f"{config.ERROR_COLOR}Error getting cookies, either you need to login into \ +furaffinity in your browser, or you can export cookies.txt manually{config.END}" + ) + + exit() + + +def next_button(page_url): + response = session.get(page_url) + s = BeautifulSoup(response.text, "html.parser") + if config.submissions is True: + # unlike galleries that are sequentially numbered, submissions use a different scheme. + # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new + try: + next_button = s.find("a", class_="button standard more").attrs.get("href") + except AttributeError: + try: + next_button = s.find("a", class_="button standard more-half").attrs.get( + "href" + ) + except AttributeError as e: + print(f"{config.WARN_COLOR}Unable to find next button{config.END}") + raise download_complete from e + page_num = next_button.split("/")[-2] + elif config.category != "favorites": + next_button = s.find("button", class_="button standard", text="Next") + if next_button is None or next_button.parent is None: + print(f"{config.WARN_COLOR}Unable to find next button{config.END}") + raise download_complete + page_num = next_button.parent.attrs["action"].split("/")[-2] + else: + page_num = fav_next_button(s) + print(f"Downloading page {page_num} - {page_url}") + return page_num + + +def fav_next_button(s): + # unlike galleries that are sequentially numbered, favorites use a different scheme. + # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) + next_button = s.find("a", class_="button standard right", text="Next") + if next_button is None: + print(f"{config.WARN_COLOR}Unable to find next button{config.END}") + raise download_complete + next_page_link = next_button.attrs["href"] + next_fav_num = re.search(r"\d+", next_page_link) + + if next_fav_num is None: + print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}") + raise download_complete + + return f"{next_fav_num[0]}/next" diff --git a/README.md b/README.md index c6509e7..197b688 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,8 @@ When downloading a folder make sure to put everything after **/folder/**, for ex ```help -usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]] - [--start START [START ...]] [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating] - [--filter] [--metadata] [--download DOWNLOAD] [-jd] [--login] [--check] [--output] +usage: furaffinity-dl.py [-h] [-c COOKIES] [--output OUTPUT_FOLDER] [--check] [-ua USER_AGENT] [-sub] [-f FOLDER] [-s START [START ...]] + [-S STOP] [-rd] [-i INTERVAL] [-r] [--filter] [-m] [--download DOWNLOAD] [-jd] [--login] [username] [category] Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications @@ -37,34 +36,34 @@ positional arguments: options: -h, --help show this help message and exit - -sub, --submissions download your submissions - -f FOLDER [FOLDER ...], --folder FOLDER [FOLDER ...] - full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here - -c COOKIES [COOKIES ...], --cookies COOKIES [COOKIES ...] + -c COOKIES, --cookies COOKIES path to a NetScape cookies file - -ua USER_AGENT [USER_AGENT ...], --user-agent USER_AGENT [USER_AGENT ...] + --output OUTPUT_FOLDER, -o OUTPUT_FOLDER + set a custom output folder + --check check and download latest submissions of a user + -ua USER_AGENT, --user-agent USER_AGENT Your browser's useragent, may be required, depending on your luck - --start START [START ...], -s START [START ...] + -sub, --submissions download your submissions + -f FOLDER, --folder FOLDER + full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here + -s START [START ...], --start START [START ...] page number to start from - --stop STOP [STOP ...], -S STOP [STOP ...] - Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) - --redownload, -rd Redownload files that have been downloaded already - --interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...] + -S STOP, --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) + -rd, --redownload Redownload files that have been downloaded already + -i INTERVAL, --interval INTERVAL delay between downloading pages in seconds [default: 0] - --rating, -r disable rating separation + -r, --rating disable rating separation --filter enable submission filter - --metadata, -m enable metadata saving + -m, --metadata enable metadata saving --download DOWNLOAD download a specific submission /view/12345678/ -jd, --json-description download description as a JSON list --login extract furaffinity cookies directly from your browser - --check check and download latest submissions of a user - --output, -o set a custom output folder Examples: python3 furaffinity-dl.py koul -> will download gallery of user koul python3 furaffinity-dl.py koul scraps -> will download scraps of user koul - python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox + python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox You can also download a several users in one go like this: python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox @@ -72,7 +71,7 @@ You can also provide a file with user names that are separated by a new line You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions - python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. diff --git a/furaffinity-dl.py b/furaffinity-dl.py index 49fb1ea..38139d3 100644 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -1,437 +1,38 @@ #!/usr/bin/python3 -import argparse import contextlib import http.cookiejar as cookielib -import json import os -import re from time import sleep import requests from bs4 import BeautifulSoup -from pathvalidate import sanitize_filename -from tqdm import tqdm - -# COLORS -WHITE = "\033[1;37m" -RED = "\033[1;91m" -GREEN = "\033[1;92m" -YELLOW = "\033[1;33m" -END = "\033[0m" - -# Argument parsing -parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, - description="Downloads the entire gallery/scraps/folder/favorites \ -of a furaffinity user, or your submissions notifications", - epilog=""" -Examples: - python3 furaffinity-dl.py koul -> will download gallery of user koul - python3 furaffinity-dl.py koul scraps -> will download scraps of user koul - python3 furaffinity-dl.py mylafox favorites -> will download favorites \ -of user mylafox \n -You can also download a several users in one go like this: - python3 furaffinity-dl.py "koul radiquum mylafox" \ --> will download gallery of users koul -> radiquum -> mylafox -You can also provide a file with user names that are separated by a new line\n -You can also log in to FurAffinity in a web browser and load cookies to \ -download age restricted content or submissions: - python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \ -gallery of user letodoesart including Mature and Adult submissions - python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \ -submissions notifications \n -DISCLAIMER: It is your own responsibility to check whether batch downloading \ -is allowed by FurAffinity terms of service and to abide by them. -""", -) -parser.add_argument( - "username", - nargs="?", - help="username of the furaffinity \ -user", -) -parser.add_argument( - "category", - nargs="?", - help="the category to download, gallery/scraps/favorites \ -[default: gallery]", - default="gallery", -) -parser.add_argument( - "-sub", - "--submissions", - action="store_true", - help="download your \ -submissions", -) -parser.add_argument( - "-f", - "--folder", - nargs="+", - help="full path of the furaffinity gallery folder. for instance 123456/\ -Folder-Name-Here", -) -parser.add_argument( - "-c", "--cookies", nargs="+", help="path to a NetScape cookies file" -) -parser.add_argument( - "-ua", - "--user-agent", - dest="user_agent", - nargs="+", - default=[ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \ -Firefox/101.0" - ], - help="Your browser's useragent, may be required, depending on your luck", -) -parser.add_argument( - "--start", "-s", default=[1], help="page number to start from", nargs="+" -) -parser.add_argument( - "--stop", - "-S", - default=[0], - nargs="+", - help="Page number to stop on. Specify the full URL after the username: for \ -favorites pages (1234567890/next) or for submissions pages: \ -(new~123456789@48)", -) -parser.add_argument( - "--redownload", - "-rd", - dest="dont_redownload", - action="store_false", - help="Redownload files that have been downloaded already", -) -parser.add_argument( - "--interval", - "-i", - type=int, - default=[0], - nargs="+", - help="delay between downloading pages in seconds [default: 0]", -) -parser.add_argument( - "--rating", - "-r", - action="store_false", - help="disable rating separation", -) -parser.add_argument( - "--filter", - action="store_true", - help="enable submission filter", -) -parser.add_argument( - "--metadata", - "-m", - action="store_true", - help="enable metadata saving", -) -parser.add_argument( - "--download", - help="download a specific submission /view/12345678/", -) -parser.add_argument( - "-jd", - "--json-description", - dest="json_description", - action="store_true", - help="download description as a JSON list", -) -parser.add_argument( - "--login", - action="store_true", - help="extract furaffinity cookies directly from your browser", -) -parser.add_argument( - "--check", - action="store_true", - help="check and download latest submissions of a user", -) -parser.add_argument( - "--output", - "-o", - dest="output_folder", - default="Submissions", - help="set a custom output folder", -) - -args = parser.parse_args() - -BASE_URL = "https://www.furaffinity.net" -if args.username != None: - username = args.username.split(" ") -category = args.category +import Modules.config as config +from Modules.functions import download_complete +from Modules.functions import download +from Modules.functions import login +from Modules.functions import next_button +from Modules.functions import system_message_handler # get session session = requests.session() -session.headers.update({"User-Agent": args.user_agent[0]}) +session.headers.update({"User-Agent": config.user_agent}) -if args.cookies is not None: # add cookies if present - cookies = cookielib.MozillaCookieJar(args.cookies[0]) +if config.cookies is not None: # add cookies if present + cookies = cookielib.MozillaCookieJar(config.cookies) cookies.load() session.cookies = cookies -# Functions - - -def download_file(url, fname, desc): - try: - r = session.get(url, stream=True) - if r.status_code != 200: - print( - f'{RED}Got a HTTP {r.status_code} while downloading \ -"{fname}". URL {url} ...skipping{END}' - ) - return False - - total = int(r.headers.get("Content-Length", 0)) - with open(fname, "wb") as file, tqdm( - desc=desc.ljust(40), - total=total, - miniters=100, - unit="b", - unit_scale=True, - unit_divisor=1024, - ) as bar: - for data in r.iter_content(chunk_size=1024): - size = file.write(data) - bar.update(size) - except KeyboardInterrupt: - print(f"{GREEN}Finished downloading{END}") - os.remove(fname) - exit() - - return True - - -def system_message_handler(s): - try: - message = { - s.find(class_="notice-message") - .find("div").find(class_="link-override").text.strip() - } - except AttributeError: - message = ( - s.find("section", class_="aligncenter notice-message") - .find("div", class_="section-body alignleft") - .find("div", class_="redirect-message") - .text.strip() - ) - print(f"{YELLOW}System Message: {message}{END}") - raise System_Message - - -def check_filter(title): - search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ -|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ -|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\ -|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ -|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\ -|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ -|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\ -|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ -|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\ -|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\ -|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\ -|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ -|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\ -|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\ -|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\ -|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ -|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\ -|REM[insder]*\\b\ -|\\bREF|\\bSale|auction|multislot|stream|adopt' - - match = re.search( - search, - title, - re.IGNORECASE, - ) - if match is not None and title == match.string: - return True - return None - - -def create_metadata(output, data, s, title, filename): - os.makedirs(f'{output}/metadata', exist_ok=True) - metadata = f"{output}/metadata/{title} - {filename}" - if args.rating is True: - os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True) - metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}' - - # Extract description as list - if args.json_description is True: - for desc in s.find("div", class_="submission-description").stripped_strings: - data["description"].append(desc) - - # Extact tags - - try: - for tag in s.find(class_="tags-row").findAll(class_="tags"): - data["tags"].append(tag.find("a").text) - except AttributeError: - print(f'{YELLOW}"{title}" has no tags{END}') - - # Extract comments - for comment in s.findAll(class_="comment_container"): - temp_ele = comment.find(class_="comment-parent") - parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:]) - # Comment is deleted or hidden - if comment.find(class_="comment-link") is None: - continue - - data["comments"].append( - { - "cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]), - "parent_cid": parent_cid, - "content": comment.find(class_="comment_text").contents[0].strip(), - "username": comment.find(class_="comment_username").text, - "date": comment.find(class_="popup_date").attrs.get("title"), - } - ) - - # Write a UTF-8 encoded JSON file for metadata - with open(f"{metadata}.json", "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - -def login(): - import browser_cookie3 - - CJ = browser_cookie3.load() - response = session.get(BASE_URL, cookies=CJ) - FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"] - - cookie_a = FA_COOKIES["a"] - cookie_b = FA_COOKIES["b"] - - s = BeautifulSoup(response.text, "html.parser") - try: - s.find(class_="loggedin_user_avatar") - account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") - print(f"{GREEN}Logged in as: {account_username}{END}") - with open("cookies.txt", "w") as file: - file.write( - f"""# Netscape HTTP Cookie File -# http://curl.haxx.se/rfc/cookie_spec.html -# This is a generated file! Do not edit. -.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value} -.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}""" - ) - print( - f'{GREEN}cookies saved successfully, now you can provide them \ -by using "-c cookies.txt"{END}' - ) - except AttributeError: - print( - f"{RED}Error getting cookies, either you need to login into \ -furaffinity in your browser, or you can export cookies.txt manually{END}" - ) - - exit() - - -# File downloading - - -class Check_Complete(Exception): - pass - -class System_Message(Exception): - pass - - -def download(path): - response = session.get(f"{BASE_URL}{path}") - s = BeautifulSoup(response.text, "html.parser") - - # System messages - if s.find(class_="notice-message") is not None: - system_message_handler(s) - - image = s.find(class_="download").find("a").attrs.get("href") - title = s.find(class_="submission-title").find("p").contents[0] - title = sanitize_filename(title) - dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n") - - if args.json_description is True: - dsc = [] - filename = image.split("/")[-1:][0] - data = { - "id": int(path.split("/")[-2:-1][0]), - "filename": filename, - "author": s.find(class_="submission-id-sub-container") - .find("a") - .find("strong") - .text, - "date": s.find(class_="popup_date").attrs.get("title"), - "title": title, - "description": dsc, - "url": f"{BASE_URL}{path}", - "tags": [], - "category": s.find(class_="info").find(class_="category-name").text, - "type": s.find(class_="info").find(class_="type-name").text, - "species": s.find(class_="info").findAll("div")[2].find("span").text, - "gender": s.find(class_="info").findAll("div")[3].find("span").text, - "views": int(s.find(class_="views").find(class_="font-large").text), - "favorites": int(s.find(class_="favorites").find(class_="font-large").text), - "rating": s.find(class_="rating-box").text.strip(), - "comments": [], - } - if args.filter is True and check_filter(title) is True: - print(f'{YELLOW}"{title}" was filtered and will not be \ -downloaded - {data.get("url")}{END}') - return True - - image_url = f"https:{image}" - output = f"{args.output_folder}/{data.get('author')}" - if category != "gallery": - output = f"{args.output_folder}/{data.get('author')}/{category}" - if args.folder is not None: - output = f"{args.output_folder}/{data.get('author')}/{folder[1]}" - os.makedirs(output, exist_ok=True) - filename = sanitize_filename(filename) - global output_path - output_path = f"{output}/{title} - {filename}" - if args.rating is True: - os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) - output_path = f'{output}/{data.get("rating")}/{title} - {filename}' - - if args.dont_redownload is True and os.path.isfile(output_path): - if args.check is True: - print(f"{GREEN} Downloaded all recent files of \"{data.get('author')}\"") - raise Check_Complete - print(f'{YELLOW}Skipping "{title}" since it\'s already downloaded{END}') - return True - else: - download_file( - image_url, - output_path, - f'{title} - \ -[{data.get("rating")}]', - ) - - if args.metadata is True: - create_metadata(output, data, s, title, filename) - return True - - -# Main function - def main(): - page_end = args.stop[0] - page_num = args.start[0] - # download loop - with contextlib.suppress(Check_Complete, System_Message): + page_num = config.start + with contextlib.suppress(download_complete): while True: - if page_end == page_num: - print(f"{YELLOW}Reached page {page_end}, stopping.{END}") + if config.stop == page_num: + print( + f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}' + ) break page_url = f"{download_url}/{page_num}" @@ -444,99 +45,79 @@ def main(): # End of gallery if s.find(id="no-images") is not None: - print(f"{GREEN}End of gallery{END}") + print(f"{config.SUCCESS_COLOR}End of gallery{config.END}") break # Download all images on the page for img in s.findAll("figure"): download(img.find("a").attrs.get("href")) - sleep(args.interval[0]) + sleep(config.interval) - # Download submissions - if args.submissions is True: - try: - next_button = s.find("a", class_="button standard more").attrs.get( - "href" - ) - except AttributeError: - try: - next_button = s.find( - "a", class_="button standard more-half" - ).attrs.get("href") - except AttributeError: - print(f"{YELLOW}Unable to find next button{END}") - break - # unlike galleries that are sequentially numbered, submissions use a different scheme. - # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new - - page_num = next_button.split("/")[-2] - page_url = f"{BASE_URL}{next_button}" - - elif args.category != "favorites": - next_button = s.find("button", class_="button standard", text="Next") - if next_button is None or next_button.parent is None: - print(f"{YELLOW}Unable to find next button{END}") - break - - page_num = next_button.parent.attrs["action"].split("/")[-2] - else: - next_button = s.find("a", class_="button standard right", text="Next") - if next_button is None: - print(f"{YELLOW}Unable to find next button{END}") - break - - # unlike galleries that are sequentially numbered, favorites use a different scheme. - # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) - - next_page_link = next_button.attrs["href"] - next_fav_num = re.search(r"\d+", next_page_link) - - if next_fav_num is None: - print(f"{YELLOW}Failed to parse next favorite link{END}") - break - - page_num = f"{next_fav_num[0]}/next" - - print(f"{WHITE}Downloading page {page_num} - {page_url} {END}") - print( - f"{GREEN}Finished \ -downloading{END}" - ) + page_num = next_button(page_url) if __name__ == "__main__": - if args.login is True: - login() + if config.login is True: + login(config.user_agent) + + try: + response = session.get(config.BASE_URL) + except ConnectionError: + print(f"{config.ERROR_COLOR}Connection failed{config.END}") + exit() + except KeyboardInterrupt: + print(f"{config.WARN_COLOR}Aborted by user{config.END}") + exit() - response = session.get(BASE_URL) s = BeautifulSoup(response.text, "html.parser") if s.find(class_="loggedin_user_avatar") is not None: account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") - print(f'{GREEN}Logged in as "{account_username}"{END}') + print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}') else: - print(f"{YELLOW}Not logged in, NSFW content is inaccessible{END}") + print( + f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}" + ) - if args.download is not None: - download(args.download) - print(f'{GREEN}File saved as "{output_path}" {END}') + if config.download is not None: + download(config.download) exit() - if args.submissions is True: - download_url = f"{BASE_URL}/msg/submissions" - main() - exit() - - if args.folder is not None: - folder = args.folder[0].split("/") - download_url = f"{BASE_URL}/gallery/{username[0]}/folder/{args.folder[0]}" + if config.submissions is True: + download_url = f"{config.BASE_URL}/msg/submissions" main() + print( + f"{config.SUCCESS_COLOR}Finished \ +downloading submissions{config.END}" + ) exit() - if os.path.exists(username[0]): - data = open(username[0]).read() - username = filter(None, data.split("\n")) - - for username in username: - print(f'{GREEN}Now downloading "{username}"{END}') - download_url = f"{BASE_URL}/{category}/{username}" + if config.folder is not None: + folder = config.folder.split("/") + download_url = ( + f"{config.BASE_URL}/gallery/{config.username}/folder/{config.folder[1]}" + ) main() + print( + f'{config.SUCCESS_COLOR}Finished \ +downloading "{config.folder[1]}"{config.END}' + ) + exit() + + if config.category not in ["gallery", "scraps", "favorites"]: + print( + f"{config.ERROR_COLOR}Please enter a valid category [gallery/scraps/favorites] {config.END}" + ) + exit() + + if os.path.exists(config.username[0]): + data = open(config.username[0]).read() + config.username = filter(None, data.split("\n")) + + for username in config.username: + print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}') + download_url = f"{config.BASE_URL}/{config.category}/{username}" + main() + print( + f'{config.SUCCESS_COLOR}Finished \ +downloading "{username}"{config.END}' + ) diff --git a/requirements.txt b/requirements.txt index aed3318..9cd4afa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ requests tqdm browser-cookie3 pathvalidate +pre-commit