From 04a09f07ad39d3e3c56998bd226ce0aa24d0a867 Mon Sep 17 00:00:00 2001 From: Radiquum <68120017+Radiquum@users.noreply.github.com> Date: Thu, 16 Jun 2022 07:06:42 +0500 Subject: [PATCH] commits squash added: - filter - remove not fully downloaded file when exiting with CTRL + C - ability to download description as a list - ability to download a specific file changed - color output - remove custom output directory - making of directory tree based off username, category and rating - default attributes value changes - system messages parsing changes - arguments & help text change - move metadata var to section where metadata == True - add page URL to metadata file - some minor changes --- .gitignore | 7 +- README.md | 54 +++---- furaffinity-dl.py | 363 +++++++++++++++++++++++++++------------------- 3 files changed, 245 insertions(+), 179 deletions(-) diff --git a/.gitignore b/.gitignore index 2396877..19621b0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ cookies.txt *.gif *.swf -# code monitoring stuff +# Download folder +furaffinity-dl/ + +# vscode stuff .vscode -ignore \ No newline at end of file +ignore diff --git a/README.md b/README.md index 8946391..28b6f95 100644 --- a/README.md +++ b/README.md @@ -24,47 +24,51 @@ When downloading a folder make sure to put everything after **/folder/**, for ex ```help -usage: furaffinity-dl.py [-h] [--category [CATEGORY]] [--submissions] [--folder FOLDER] [--output OUTPUT] [--cookies COOKIES] - [--user-agent [UA]] [--start [START]] [--stop STOP] [--dont-redownload] [--interval INTERVAL] [--rating] - [--metadata] - username +usage: furaffinity-dl.py [-h] [--submissions] [--folder FOLDER [FOLDER ...]] [--cookies COOKIES [COOKIES ...]] + [--user-agent USER_AGENT [USER_AGENT ...]] [--start START [START ...]] [--stop STOP [STOP ...]] [--dont-redownload] + [--interval INTERVAL [INTERVAL ...]] [--rating] [--filter] [--metadata] [--download DOWNLOAD] [--json-description] + [username] [category] -Downloads the entire gallery/scraps/favorites of a furaffinity user, or your submissions +Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications positional arguments: - username username of the furaffinity user [required] + username username of the furaffinity user + category the category to download, gallery/scraps/favorites [default: gallery] options: -h, --help show this help message and exit - --category [CATEGORY], -ca [CATEGORY] - the category to download, gallery/scraps/favorites [default: gallery] - --submissions, -su download your submissions - --folder FOLDER full path of the furaffinity folder. for instance 123456/Folder-Name-Here - --output OUTPUT output directory [default: furaffinity-dl] - --cookies COOKIES, -c COOKIES + --submissions download your submissions + --folder FOLDER [FOLDER ...] + full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here + --cookies COOKIES [COOKIES ...], -c COOKIES [COOKIES ...] path to a NetScape cookies file - --user-agent [UA], -u [UA] + --user-agent USER_AGENT [USER_AGENT ...] Your browser's useragent, may be required, depending on your luck - --start [START], -s [START] + --start START [START ...], -s START [START ...] page number to start from - --stop STOP, -S STOP Page number to stop on. For favorites pages, specify the full URL after the username (1234567890/next). + --stop STOP [STOP ...], -S STOP [STOP ...] + Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) --dont-redownload, -d - Don't redownload files that have already been downloaded [default: true] - --interval INTERVAL, -i INTERVAL + Allow to redownload files that have been downloaded already + --interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...] delay between downloading pages in seconds [default: 0] - --rating, -r enable rating separation [default: true] - --metadata, -m enable downloading of metadata [default: false] + --rating, -r disable rating separation + --filter disable submission filter + --metadata, -m enable downloading of metadata + --download DOWNLOAD download a specific submission /view/12345678/ + --json-description download description as a JSON list Examples: - python3 furaffinity-dl.py koul koul_gallery - python3 furaffinity-dl.py -o koulsArt gallery koul - python3 furaffinity-dl.py -o mylasFavs --category favorites mylafox + python3 furaffinity-dl.py koul -> will download gallery of user koul + python3 furaffinity-dl.py koul scraps -> will download scraps of user koul + python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox -You can also log in to FurAffinity in a web browser and load cookies to download Age restricted content or Submissions: - python3 furaffinity-dl.py -c cookies.txt gallery letodoesart - python3 furaffinity-dl.py -c cookies.txt --submissions +You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: + python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. + ``` You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): diff --git a/furaffinity-dl.py b/furaffinity-dl.py index 8de10c3..133922f 100644 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -10,195 +10,221 @@ import requests from bs4 import BeautifulSoup from tqdm import tqdm +# COLORS +WHITE = "\033[1;37m" +RED = "\033[1;91m" +GREEN = "\033[1;92m" +YELLOW = "\033[1;33m" +END = "\033[0m" + # Argument parsing parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, - description="Downloads the entire gallery/scraps/favorites of a furaffinity user, or your submissions", + description="Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications", epilog=""" Examples: - python3 furaffinity-dl.py koul koul_gallery - python3 furaffinity-dl.py -o koulsArt gallery koul - python3 furaffinity-dl.py -o mylasFavs --category favorites mylafox\n -You can also log in to FurAffinity in a web browser and load cookies to download Age restricted content or Submissions: - python3 furaffinity-dl.py -c cookies.txt gallery letodoesart - python3 furaffinity-dl.py -c cookies.txt --submissions\n + python3 furaffinity-dl.py koul -> will download gallery of user koul + python3 furaffinity-dl.py koul scraps -> will download scraps of user koul + python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox \n +You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: + python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions + python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications \n DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. """, ) - -# General stuff +parser.add_argument("username", nargs="?", help="username of the furaffinity user") parser.add_argument( - "--category", - "-ca", - type=str, + "category", nargs="?", help="the category to download, gallery/scraps/favorites [default: gallery]", - const=1, default="gallery", ) parser.add_argument( - "--submissions", - "-su", - action="store_true", - help="download your submissions", -) -parser.add_argument( - "username", - type=str, - help="username of the furaffinity user [required]", + "--submissions", action="store_true", help="download your submissions" ) parser.add_argument( "--folder", - type=str, - help="full path of the furaffinity folder. for instance 123456/Folder-Name-Here", + nargs="+", + help="full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here", ) parser.add_argument( - "--output", type=str, default="furaffinity-dl", help="output directory [default: furaffinity-dl]" -) -parser.add_argument( - "--cookies", - "-c", - dest="cookies", - type=str, - help="path to a NetScape cookies file", + "--cookies", "-c", nargs="+", help="path to a NetScape cookies file" ) parser.add_argument( "--user-agent", - "-u", - dest="ua", - type=str, - nargs="?", - default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.7) Gecko/20100101 Firefox/68.7", + dest="user_agent", + nargs="+", + default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0", help="Your browser's useragent, may be required, depending on your luck", ) parser.add_argument( - "--start", "-s", type=str, default=1, help="page number to start from", nargs="?" + "--start", "-s", default=[1], help="page number to start from", nargs="+" ) parser.add_argument( "--stop", "-S", - dest="stop", - type=str, - help="Page number to stop on. For favorites pages, specify the full URL after the username (1234567890/next).", + default=[0], + nargs="+", + help="Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)", ) parser.add_argument( "--dont-redownload", "-d", action="store_false", - help="Don't redownload files that have already been downloaded [default: true]", + help="Allow to redownload files that have been downloaded already", ) parser.add_argument( "--interval", "-i", - dest="interval", - type=float, - default=0, + type=int, + default=[0], + nargs="+", help="delay between downloading pages in seconds [default: 0]", ) parser.add_argument( "--rating", "-r", action="store_false", - help="enable rating separation [default: true]", + help="disable rating separation", +) +parser.add_argument( + "--filter", + action="store_false", + help="disable submission filter", ) parser.add_argument( "--metadata", "-m", action="store_true", - help="enable downloading of metadata [default: false]", + help="enable downloading of metadata", +) +parser.add_argument( + "--download", + help="download a specific submission /view/12345678/", +) +parser.add_argument( + "--json-description", + dest="json_description", + action="store_true", + help="download description as a JSON list", ) args = parser.parse_args() BASE_URL = "https://www.furaffinity.net" -categories = { - "gallery": "gallery", - "scraps": "scraps", - "favorites": "favorites", -} -category = categories.get(args.category) -if category is None: - print("please enter a valid category") - exit() -if args.username is None: - print("please enter a FA Username") - exit() -if args.output is None: - print("please enter a output folder") - exit() - username = args.username -output = f"{args.output}/{args.username}" -metadata = f"{output}/metadata" -filter = {"YCH Open", "Reminder", "YCH Closed", "Auction"} +if args.submissions is False and args.download is None: # check if you are not downloading submissions or a specific post + categories = { + "gallery": "gallery", + "scraps": "scraps", + "favorites": "favorites", + } + category = categories.get(args.category) + if args.username is None: + print(f"{RED} please enter a FA Username{END}") + exit() + if category is None: + print(f"{RED} please enter a valid category gallery/scraps/favorites{END}") + exit() + download_url = f"{BASE_URL}/{category}/{username}" + output = f"furaffinity-dl/{category}/{username}" + +# get session session = requests.session() -session.headers.update({"User-Agent": args.ua}) +session.headers.update({"User-Agent": args.user_agent[0]}) -if args.cookies is not None: - cookies = cookielib.MozillaCookieJar(args.cookies) +if args.cookies is not None: # add cookies if present + cookies = cookielib.MozillaCookieJar(args.cookies[0]) cookies.load() session.cookies = cookies - +# File downloading def download_file(url, fname, desc): - r = session.get(url, stream=True) - if r.status_code != 200: - print(f"Got a HTTP {r.status_code} while downloading {fname}; ...skipping") - return False + try: + r = session.get(url, stream=True) + if r.status_code != 200: + print( + f"{RED} Got a HTTP {r.status_code} while downloading {fname}; ...skipping{END}" + ) + return False + + total = int(r.headers.get("Content-Length", 0)) + with open(fname, "wb") as file, tqdm( + desc=desc.ljust(60)[:60], + total=total, + miniters=100, + unit="b", + unit_scale=True, + unit_divisor=1024, + ) as bar: + for data in r.iter_content(chunk_size=1024): + size = file.write(data) + bar.update(size) + except KeyboardInterrupt: + print(f"{GREEN} Finished downloading{END}") + os.remove(fname) + exit() - total = int(r.headers.get("Content-Length", 0)) - with open(fname, "wb") as file, tqdm( - desc=desc.ljust(40)[:40], - total=total, - miniters=100, - unit="b", - unit_scale=True, - unit_divisor=1024, - ) as bar: - for data in r.iter_content(chunk_size=1024): - size = file.write(data) - bar.update(size) return True -download_url = f"{BASE_URL}/{category}/{username}" +# checks if you are downloading forder or submission if args.folder is not None: - download_url = f"{BASE_URL}/gallery/{username}/folder/{args.folder}" -if args.submissions is True: + folder = args.folder[0].split("/") + download_url = f"{BASE_URL}/gallery/{username}/folder/{args.folder[0]}" + output = f"furaffinity-dl/folders/{username}/{folder[1]}" +if args.submissions is True: download_url = f"{BASE_URL}/msg/submissions" - def download(path): response = session.get(f"{BASE_URL}{path}") s = BeautifulSoup(response.text, "html.parser") - + # System messages if s.find(class_="notice-message") is not None: - message = ( - s.find(class_="notice-message") - .find("div") - .find(class_="link-override") - .text.strip() - ) - raise Exception("System Message", message) + try: + message = ( + s.find(class_="notice-message") + .find("div") + .find(class_="link-override") + .text.strip() + .replace(".", ". \n") + ) + except AttributeError: + message = ( + s.find(class_="notice-message") + .find("div", class_="section-body alignleft") + .find("div", class_="redirect-message") + .text.strip() + .replace(".", ". \n") + ) + print(f"{YELLOW} System Message: {message}{END}") + exit() image = s.find(class_="download").find("a").attrs.get("href") - title = s.find(class_="submission-title").find("p").contents[0] + title = s.find(class_="submission-title").find("p").contents[0] + " " + description = s.find(class_="submission-description").text.strip().replace("\r\n", "\n") + author = s.find(class_="submission-id-sub-container") + if args.submissions is True: + global output + output = f"furaffinity-dl/gallery/{author}" + + if args.json_description is True: + description = [] filename = image.split("/")[-1:][0] data = { "id": int(path.split("/")[-2:-1][0]), "filename": filename, - "author": s.find(class_="submission-id-sub-container") + "author": author .find("a") .find("strong") .text, "date": s.find(class_="popup_date").attrs.get("title"), "title": title, - "description": s.find(class_="submission-description") - .text.strip() - .replace("\r\n", "\n"), + "description": description, + "url": f"{BASE_URL}{path}", "tags": [], "category": s.find(class_="info").find(class_="category-name").text, "type": s.find(class_="info").find(class_="type-name").text, @@ -210,39 +236,49 @@ def download(path): "comments": [], } - match = re.search( - "reminder|auction|YCH Open|YCH Closed|Adopt|pre-order", title, re.IGNORECASE - ) - if match is not None and title == match.string: - print(f"post {title} was filtered and will not be downloaded") - return True - - # Extact tags - try: - for tag in s.find(class_="tags-row").findAll(class_="tags"): - data["tags"].append(tag.find("a").text) - except AttributeError: - print(f'post: "{title}", has no tags') + if args.filter is True: + match = re.search( + "YCH[a-z $-/:-?{-~!\"^_`\[\]]*OPEN|OPEN[a-z $-/:-?{-~!\"^_`\[\]]*YCH|YCH[a-z $-/:-?{-~!\"^_`\[\]]*CLOSE|CLOSE[a-z $-/:-?{-~!\"^_`\[\]]*YCH|YCH[a-z $-/:-?{-~!\"^_`\[\]]*ABLE|AVAIL[a-z $-/:-?{-~!\"^_`\[\]]*YCH|YCH[a-z $-/:-?{-~!\"^_`\[\]]*CLONE|CLONE[a-z $-/:-?{-~!\"^_`\[\]]*YCH|YCH[a-z $-/:-?{-~!\"^_`\[\]]*LIM|LIM[a-z $-/:-?{-~!\"^_`\[\]]*YCH|COM[a-z $-/:-?{-~!\"^_`\[\]]*OPEN|OPEN[a-z $-/:-?{-~!\"^_`\[\]]*COM|COM[a-z $-/:-?{-~!\"^_`\[\]]*CLOSE|CLOSE[a-z $-/:-?{-~!\"^_`\[\]]*COM|FIX[a-z $-/:-?{-~!\"^_`\[\]]*ICE|REM[insder]*\W|\\bREF|Sale$|auction|multislot|stream|adopt", + title, + re.IGNORECASE, + ) + if match is not None and title == match.string: + print( + f"{YELLOW} post {title} was filtered and will not be downloaded - {data.get('url')}{END}" + ) + return True image_url = f"https:{image}" + os.makedirs(output, exist_ok=True) output_path = f"{output}/{filename}" if args.rating is True: os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) output_path = f'{output}/{data.get("rating")}/{filename}' - if args.folder is not None: - os.makedirs(f"{output}/{args.folder}", exist_ok=True) - output_path = f"{output}/{args.folder}/{filename}" - if args.rating is True: - os.makedirs(f'{output}/{args.folder}/{data.get("rating")}', exist_ok=True) - output_path = f'{output}/{args.folder}/{data.get("rating")}/{filename}' if args.dont_redownload is True and os.path.isfile(output_path): - print(f'Skipping "{title}", since it\'s already downloaded') + print(f'{YELLOW} Skipping "{title}", since it\'s already downloaded{END}') else: download_file(image_url, output_path, title) if args.metadata is True: + + metadata = f"{output}/metadata" + + # Extract description as list + if args.json_description is True: + for desc in s.find("div", class_="submission-description").strings: + description = desc.strip() + data["description"].append(description) + + # Extact tags + + try: + for tag in s.find(class_="tags-row").findAll(class_="tags"): + data["tags"].append(tag.find("a").text) + except AttributeError: + print(f'{YELLOW} post: "{title}", has no tags{END}') + # Extract comments for comment in s.findAll(class_="comment_container"): temp_ele = comment.find(class_="comment-parent") @@ -274,23 +310,30 @@ def download(path): return True - -# Main downloading loop +if args.download is not None: + output = "furaffinity-dl/downloaded/" + download(args.download) + print(f"{GREEN} File downloaded{END}") + exit() + +# Main function def main(): - page_end = args.stop - page_num = args.start + # check if you are logged in + page_end = args.stop[0] + page_num = args.start[0] page_url = f"{download_url}/{page_num}" response = session.get(page_url) s = BeautifulSoup(response.text, "html.parser") if s.find(class_="loggedin_user_avatar") is not None: account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") - print("Logged in as", account_username) + print(f"{GREEN} Logged in as: {account_username}{END}") else: - print("Not logged in, NSFW content is inaccessible") + print(f"{YELLOW} Not logged in, NSFW content is inaccessible{END}") + # download loop while True: if page_end == page_num: - print(f"Reached page {page_end}, stopping.") + print(f"{YELLOW} Reached page {page_end}, stopping.{END}") break page_url = f"{download_url}/{page_num}" @@ -305,50 +348,66 @@ def main(): .find("div") .find(class_="link-override") .text.strip() + .replace(".", ". \n") ) except AttributeError: - print("You didn't provide cookies to log in") - break - raise Exception("System Message", message) + message = ( + s.find(class_="notice-message") + .find("div", class_="section-body alignleft") + .find("div", class_="redirect-message") + .text.strip() + .replace(".", ". \n") + ) + print(f"{YELLOW} System Message: {message}{END}") + exit() # End of gallery if s.find(id="no-images") is not None: - print("End of gallery") + print(f"{GREEN} End of gallery{END}") break # Download all images on the page for img in s.findAll("figure"): download(img.find("a").attrs.get("href")) - sleep(args.interval) + sleep(args.interval[0]) + # Download submissions if args.submissions is True: - next_button = s.find("a", class_="button standard more", text="Next 48") - if next_button is None or next_button.parent is None: - next_button = s.find( - "a", class_="button standard more-half", text="Next 48" + try: + next_button = s.find("a", class_="button standard more").attrs.get( + "href" ) - if next_button is None or next_button.parent is None: - print("Unable to find next button") + except AttributeError: + try: + next_button = s.find( + "a", class_="button standard more-half" + ).attrs.get("href") + except AttributeError: + print(f"{YELLOW} Unable to find next button{END}") break - next_page_link = next_button.attrs["href"] - page_num = next_page_link.split("/")[-2] - page_url = BASE_URL + next_page_link + # unlike galleries that are sequentially numbered, submissions use a different scheme. + # the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new - print("Downloading page", page_num, page_url) + page_num = next_button.split("/")[-2] + page_url = f"{BASE_URL}{next_button}" + + print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") + # Download everything else elif args.category != "favorites": next_button = s.find("button", class_="button standard", text="Next") if next_button is None or next_button.parent is None: - print("Unable to find next button") + print(f"{YELLOW} Unable to find next button{END}") break page_num = next_button.parent.attrs["action"].split("/")[-2] - print("Downloading page", page_num, page_url) + print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") + # Download favorites else: - next_button = s.find("a", class_="button mobile-button right", text="Next") + next_button = s.find("a", class_="button standard right", text="Next") if next_button is None: - print("Unable to find next button") + print(f"{YELLOW} Unable to find next button{END}") break # unlike galleries that are sequentially numbered, favorites use a different scheme. @@ -358,16 +417,16 @@ def main(): next_fav_num = re.search(r"\d+", next_page_link) if next_fav_num is None: - print("Failed to parse next favorite link.") + print(f"{YELLOW} Failed to parse next favorite link{END}") break page_num = next_fav_num.group(0) + "/next" # parse it into numbers/next - print("Downloading page", page_num, page_url) + print(f"{WHITE} Downloading page {page_num} - {page_url} {END}") - print("Finished downloading") + print(f"{GREEN}Finished downloading{END}") if __name__ == "__main__":