From 6e51fffed7a8776fa00fe58fdbd69bec09298da9 Mon Sep 17 00:00:00 2001 From: Radiquum Date: Thu, 16 Jun 2022 06:41:26 +0500 Subject: [PATCH] "rewrited" some part of the code Create directory tree based on author name Filter posts like "YCH OPEN, REMINDER" argument parser changes --- .gitignore | 6 +- .pre-commit-config.yaml | 43 ++++ LICENSE | 2 +- README.md | 45 ++-- furaffinity-dl.py | 491 ++++++++++++++++++++++++---------------- 5 files changed, 376 insertions(+), 211 deletions(-) create mode 100644 .pre-commit-config.yaml mode change 100755 => 100644 furaffinity-dl.py diff --git a/.gitignore b/.gitignore index d822ee0..2396877 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,8 @@ cookies.txt *.jpg *.json *.gif -*.swf \ No newline at end of file +*.swf + +# code monitoring stuff +.vscode +ignore \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..16cc81d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + args: [--safe] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: debug-statements + language_version: python3 + + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + language_version: python3 + + - repo: https://github.com/asottile/reorder_python_imports + rev: v3.1.0 + hooks: + - id: reorder-python-imports + args: [--application-directories=.:src, --py39-plus] + + - repo: https://github.com/asottile/pyupgrade + rev: v2.34.0 + hooks: + - id: pyupgrade + args: [--py39-plus] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.961 + hooks: + - id: mypy + files: ^src/ + args: [] diff --git a/LICENSE b/LICENSE index 6282edb..64ba10c 100644 --- a/LICENSE +++ b/LICENSE @@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 2a8c7c4..8946391 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,15 @@ This branch is the development version of furaffinity-dl rewritten in python. **furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favourites) from furaffinity users users or your submissons! It was written for preservation of culture, to counter the people nuking their galleries every once a while. +and then modified for confinience. Supports all known submission types: images, text, flash and audio. ## Requirements -`pip3 install -r requirements.txt` +`python 3` + +`pip install -r requirements.txt` **The script currently only works with the "Modern" theme** @@ -21,48 +24,50 @@ When downloading a folder make sure to put everything after **/folder/**, for ex ```help -usage: furaffinity-dl.py [-h] [--output OUTPUT] [--cookies COOKIES] [--ua UA] [--start START] [--stop STOP] [--dont-redownload] - [--interval INTERVAL] [--metadir METADIR] - [category] [username] [folder] +usage: furaffinity-dl.py [-h] [--category [CATEGORY]] [--submissions] [--folder FOLDER] [--output OUTPUT] [--cookies COOKIES] + [--user-agent [UA]] [--start [START]] [--stop STOP] [--dont-redownload] [--interval INTERVAL] [--rating] + [--metadata] + username Downloads the entire gallery/scraps/favorites of a furaffinity user, or your submissions positional arguments: - category the category to download, gallery/scraps/favorites - username username of the furaffinity user - folder name of the folder (full path, for instance 123456/Folder-Name-Here) + username username of the furaffinity user [required] options: -h, --help show this help message and exit - --output OUTPUT, -o OUTPUT - output directory + --category [CATEGORY], -ca [CATEGORY] + the category to download, gallery/scraps/favorites [default: gallery] + --submissions, -su download your submissions + --folder FOLDER full path of the furaffinity folder. for instance 123456/Folder-Name-Here + --output OUTPUT output directory [default: furaffinity-dl] --cookies COOKIES, -c COOKIES path to a NetScape cookies file - --ua UA, -u UA Your browser's useragent, may be required, depending on your luck - --start START, -s START + --user-agent [UA], -u [UA] + Your browser's useragent, may be required, depending on your luck + --start [START], -s [START] page number to start from --stop STOP, -S STOP Page number to stop on. For favorites pages, specify the full URL after the username (1234567890/next). --dont-redownload, -d - Don't redownload files that have already been downloaded + Don't redownload files that have already been downloaded [default: true] --interval INTERVAL, -i INTERVAL - delay between downloading pages - --metadir METADIR, -m METADIR - directory to put meta files in + delay between downloading pages in seconds [default: 0] + --rating, -r enable rating separation [default: true] + --metadata, -m enable downloading of metadata [default: false] Examples: - python3 furaffinity-dl.py gallery koul + python3 furaffinity-dl.py koul koul_gallery python3 furaffinity-dl.py -o koulsArt gallery koul - python3 furaffinity-dl.py -o mylasFavs favorites mylafox + python3 furaffinity-dl.py -o mylasFavs --category favorites mylafox You can also log in to FurAffinity in a web browser and load cookies to download Age restricted content or Submissions: python3 furaffinity-dl.py -c cookies.txt gallery letodoesart - python3 furaffinity-dl.py -c cookies.txt msg submissions + python3 furaffinity-dl.py -c cookies.txt --submissions DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. - ``` -You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): +You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): `python3 furaffinity-dl.py -c cookies.txt -u 'Mozilla/5.0 ....' gallery letodoesart` diff --git a/furaffinity-dl.py b/furaffinity-dl.py old mode 100755 new mode 100644 index 26b7e6c..8de10c3 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -1,99 +1,161 @@ #!/usr/bin/python3 import argparse -from types import NoneType -from tqdm import tqdm -from argparse import RawTextHelpFormatter -import json -from bs4 import BeautifulSoup -import requests import http.cookiejar as cookielib -import re +import json import os +import re from time import sleep -''' -Please refer to LICENSE for licensing conditions. -''' +import requests +from bs4 import BeautifulSoup +from tqdm import tqdm # Argument parsing -parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description='Downloads the entire gallery/scraps/favorites of a furaffinity user, or your submissions', epilog=''' +parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description="Downloads the entire gallery/scraps/favorites of a furaffinity user, or your submissions", + epilog=""" Examples: - python3 furaffinity-dl.py gallery koul + python3 furaffinity-dl.py koul koul_gallery python3 furaffinity-dl.py -o koulsArt gallery koul - python3 furaffinity-dl.py -o mylasFavs favorites mylafox\n + python3 furaffinity-dl.py -o mylasFavs --category favorites mylafox\n You can also log in to FurAffinity in a web browser and load cookies to download Age restricted content or Submissions: python3 furaffinity-dl.py -c cookies.txt gallery letodoesart - python3 furaffinity-dl.py -c cookies.txt msg submissions\n + python3 furaffinity-dl.py -c cookies.txt --submissions\n DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. -''') -parser.add_argument('category', metavar='category', type=str, nargs='?', default='gallery', help='the category to download, gallery/scraps/favorites') -parser.add_argument('username', metavar='username', type=str, nargs='?', help='username of the furaffinity user') -parser.add_argument('folder', metavar='folder', type=str, nargs='?', help='name of the folder (full path, for instance 123456/Folder-Name-Here)') -parser.add_argument('--output', '-o', dest='output', type=str, default='.', help="output directory") -parser.add_argument('--cookies', '-c', dest='cookies', type=str, default='', help="path to a NetScape cookies file") -parser.add_argument('--ua', '-u', dest='ua', type=str, default='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.7) Gecko/20100101 Firefox/68.7', help="Your browser's useragent, may be required, depending on your luck") -parser.add_argument('--start', '-s', dest='start', type=str, default=1, help="page number to start from") -parser.add_argument('--stop', '-S', dest='stop', type=str, default='', help="Page number to stop on. For favorites pages, specify the full URL after the username (1234567890/next).") -parser.add_argument('--dont-redownload', '-d', const='dont_redownload', action='store_const', help="Don't redownload files that have already been downloaded") -parser.add_argument('--interval', '-i', dest='interval', type=float, default=0, help="delay between downloading pages") -parser.add_argument('--metadir', '-m', dest='metadir', type=str, default=None, help="directory to put meta files in") +""", +) + +# General stuff +parser.add_argument( + "--category", + "-ca", + type=str, + nargs="?", + help="the category to download, gallery/scraps/favorites [default: gallery]", + const=1, + default="gallery", +) +parser.add_argument( + "--submissions", + "-su", + action="store_true", + help="download your submissions", +) +parser.add_argument( + "username", + type=str, + help="username of the furaffinity user [required]", +) +parser.add_argument( + "--folder", + type=str, + help="full path of the furaffinity folder. for instance 123456/Folder-Name-Here", +) +parser.add_argument( + "--output", type=str, default="furaffinity-dl", help="output directory [default: furaffinity-dl]" +) +parser.add_argument( + "--cookies", + "-c", + dest="cookies", + type=str, + help="path to a NetScape cookies file", +) +parser.add_argument( + "--user-agent", + "-u", + dest="ua", + type=str, + nargs="?", + default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.7) Gecko/20100101 Firefox/68.7", + help="Your browser's useragent, may be required, depending on your luck", +) +parser.add_argument( + "--start", "-s", type=str, default=1, help="page number to start from", nargs="?" +) +parser.add_argument( + "--stop", + "-S", + dest="stop", + type=str, + help="Page number to stop on. For favorites pages, specify the full URL after the username (1234567890/next).", +) +parser.add_argument( + "--dont-redownload", + "-d", + action="store_false", + help="Don't redownload files that have already been downloaded [default: true]", +) +parser.add_argument( + "--interval", + "-i", + dest="interval", + type=float, + default=0, + help="delay between downloading pages in seconds [default: 0]", +) +parser.add_argument( + "--rating", + "-r", + action="store_false", + help="enable rating separation [default: true]", +) +parser.add_argument( + "--metadata", + "-m", + action="store_true", + help="enable downloading of metadata [default: false]", +) args = parser.parse_args() + +BASE_URL = "https://www.furaffinity.net" +categories = { + "gallery": "gallery", + "scraps": "scraps", + "favorites": "favorites", +} +category = categories.get(args.category) +if category is None: + print("please enter a valid category") + exit() if args.username is None: - parser.print_help() + print("please enter a FA Username") + exit() +if args.output is None: + print("please enter a output folder") exit() -# Create output directory if it doesn't exist -if args.output != '.': - os.makedirs(args.output, exist_ok=True) - -if args.metadir == None: - args.metadir = args.output -else: - os.makedirs(args.metadir, exist_ok=True) +username = args.username +output = f"{args.output}/{args.username}" +metadata = f"{output}/metadata" +filter = {"YCH Open", "Reminder", "YCH Closed", "Auction"} -# Check validity of category -valid_categories = ['gallery', 'favorites', 'scraps', 'msg'] -if args.category not in valid_categories: - raise Exception('Category is not valid', args.category) - -# Check validity of username -if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)): - raise Exception('Username contains non-valid characters', args.username) - -# Initialise a session session = requests.session() -session.headers.update({'User-Agent': args.ua}) +session.headers.update({"User-Agent": args.ua}) -# Load cookies from a netscape cookie file (if provided) -if args.cookies != '': +if args.cookies is not None: cookies = cookielib.MozillaCookieJar(args.cookies) cookies.load() session.cookies = cookies -base_url = 'https://www.furaffinity.net' -gallery_url = '{}/{}/{}'.format(base_url, args.category, args.username) -if args.folder is not None: - gallery_url += "/folder/" - gallery_url += args.folder -page_num = args.start - def download_file(url, fname, desc): r = session.get(url, stream=True) if r.status_code != 200: - print("Got a HTTP {} while downloading {}; skipping".format(r.status_code, fname)) + print(f"Got a HTTP {r.status_code} while downloading {fname}; ...skipping") return False - total = int(r.headers.get('Content-Length', 0)) - with open(fname, 'wb') as file, tqdm( + total = int(r.headers.get("Content-Length", 0)) + with open(fname, "wb") as file, tqdm( desc=desc.ljust(40)[:40], total=total, miniters=100, - unit='b', + unit="b", unit_scale=True, - unit_divisor=1024 + unit_divisor=1024, ) as bar: for data in r.iter_content(chunk_size=1024): size = file.write(data) @@ -101,161 +163,212 @@ def download_file(url, fname, desc): return True -# The cursed function that handles downloading +download_url = f"{BASE_URL}/{category}/{username}" +if args.folder is not None: + download_url = f"{BASE_URL}/gallery/{username}/folder/{args.folder}" +if args.submissions is True: + download_url = f"{BASE_URL}/msg/submissions" + + def download(path): - page_url = '{}{}'.format(base_url, path) - response = session.get(page_url) - s = BeautifulSoup(response.text, 'html.parser') + response = session.get(f"{BASE_URL}{path}") + s = BeautifulSoup(response.text, "html.parser") # System messages - if s.find(class_='notice-message') is not None: - message = s.find(class_='notice-message').find('div').find(class_="link-override").text.strip() - raise Exception('System Message', message) + if s.find(class_="notice-message") is not None: + message = ( + s.find(class_="notice-message") + .find("div") + .find(class_="link-override") + .text.strip() + ) + raise Exception("System Message", message) - image = s.find(class_='download').find('a').attrs.get('href') - title = s.find(class_='submission-title').find('p').contents[0] + image = s.find(class_="download").find("a").attrs.get("href") + title = s.find(class_="submission-title").find("p").contents[0] filename = image.split("/")[-1:][0] data = { - 'id': int(path.split('/')[-2:-1][0]), - 'filename': filename, - 'author': s.find(class_='submission-id-sub-container').find('a').find('strong').text, - 'date': s.find(class_='popup_date').attrs.get('title'), - 'title': title, - 'description': s.find(class_='submission-description').text.strip().replace('\r\n', '\n'), + "id": int(path.split("/")[-2:-1][0]), + "filename": filename, + "author": s.find(class_="submission-id-sub-container") + .find("a") + .find("strong") + .text, + "date": s.find(class_="popup_date").attrs.get("title"), + "title": title, + "description": s.find(class_="submission-description") + .text.strip() + .replace("\r\n", "\n"), "tags": [], - 'category': s.find(class_='info').find(class_='category-name').text, - 'type': s.find(class_='info').find(class_='type-name').text, - 'species': s.find(class_='info').findAll('div')[2].find('span').text, - 'gender': s.find(class_='info').findAll('div')[3].find('span').text, - 'views': int(s.find(class_='views').find(class_='font-large').text), - 'favorites': int(s.find(class_='favorites').find(class_='font-large').text), - 'rating': s.find(class_='rating-box').text.strip(), - 'comments': [] + "category": s.find(class_="info").find(class_="category-name").text, + "type": s.find(class_="info").find(class_="type-name").text, + "species": s.find(class_="info").findAll("div")[2].find("span").text, + "gender": s.find(class_="info").findAll("div")[3].find("span").text, + "views": int(s.find(class_="views").find(class_="font-large").text), + "favorites": int(s.find(class_="favorites").find(class_="font-large").text), + "rating": s.find(class_="rating-box").text.strip(), + "comments": [], } + match = re.search( + "reminder|auction|YCH Open|YCH Closed|Adopt|pre-order", title, re.IGNORECASE + ) + if match is not None and title == match.string: + print(f"post {title} was filtered and will not be downloaded") + return True + # Extact tags try: - for tag in s.find(class_='tags-row').findAll(class_='tags'): - data['tags'].append(tag.find('a').text) - except: - pass + for tag in s.find(class_="tags-row").findAll(class_="tags"): + data["tags"].append(tag.find("a").text) + except AttributeError: + print(f'post: "{title}", has no tags') - # Extract comments - for comment in s.findAll(class_='comment_container'): - temp_ele = comment.find(class_='comment-parent') - parent_cid = None if temp_ele is None else int(temp_ele.attrs.get('href')[5:]) + image_url = f"https:{image}" + os.makedirs(output, exist_ok=True) + output_path = f"{output}/{filename}" + if args.rating is True: + os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True) + output_path = f'{output}/{data.get("rating")}/{filename}' + if args.folder is not None: + os.makedirs(f"{output}/{args.folder}", exist_ok=True) + output_path = f"{output}/{args.folder}/{filename}" + if args.rating is True: + os.makedirs(f'{output}/{args.folder}/{data.get("rating")}', exist_ok=True) + output_path = f'{output}/{args.folder}/{data.get("rating")}/{filename}' - # Comment is deleted or hidden - if comment.find(class_='comment-link') is None: - continue - - data['comments'].append({ - 'cid': int(comment.find(class_='comment-link').attrs.get('href')[5:]), - 'parent_cid': parent_cid, - 'content': comment.find(class_='comment_text').contents[0].strip(), - 'username': comment.find(class_='comment_username').text, - 'date': comment.find(class_='popup_date').attrs.get('title') - }) - - url = 'https:{}'.format(image) - output_path = os.path.join(args.output, filename) - - if not args.dont_redownload or not os.path.isfile(output_path): - if not download_file(url, output_path, data["title"]): - return False + if args.dont_redownload is True and os.path.isfile(output_path): + print(f'Skipping "{title}", since it\'s already downloaded') else: - print('Skipping "{}", since it\'s already downloaded'.format(data["title"])) - return True + download_file(image_url, output_path, title) - # Write a UTF-8 encoded JSON file for metadata - with open(os.path.join(args.metadir, '{}.json'.format(filename)), 'w', encoding='utf-8') as f: - json.dump(data, f, ensure_ascii=False, indent=4) + if args.metadata is True: + # Extract comments + for comment in s.findAll(class_="comment_container"): + temp_ele = comment.find(class_="comment-parent") + parent_cid = ( + None if temp_ele is None else int(temp_ele.attrs.get("href")[5:]) + ) + # Comment is deleted or hidden + if comment.find(class_="comment-link") is None: + continue + + data["comments"].append( + { + "cid": int( + comment.find(class_="comment-link").attrs.get("href")[5:] + ), + "parent_cid": parent_cid, + "content": comment.find(class_="comment_text").contents[0].strip(), + "username": comment.find(class_="comment_username").text, + "date": comment.find(class_="popup_date").attrs.get("title"), + } + ) + + # Write a UTF-8 encoded JSON file for metadata + os.makedirs(metadata, exist_ok=True) + with open( + os.path.join(metadata, f"{filename}.json"), "w", encoding="utf-8" + ) as f: + json.dump(data, f, ensure_ascii=False, indent=4) return True -global i -i = 1 + # Main downloading loop -while True: - - if args.stop and args.stop == page_num: - print(f"Reached page {args.stop}, stopping.") - break - - page_url = '{}/{}'.format(gallery_url, page_num) +def main(): + page_end = args.stop + page_num = args.start + page_url = f"{download_url}/{page_num}" response = session.get(page_url) - s = BeautifulSoup(response.text, 'html.parser') - - # Account status - if page_num == 1: - if s.find(class_='loggedin_user_avatar') is not None: - account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt') - print('Logged in as', account_username) - else: - print('Not logged in, NSFW content is inaccessible') - - # System messages - if s.find(class_='notice-message') is not None: - message = s.find(class_='notice-message').find('div').find(class_="link-override").text.strip() - raise Exception('System Message', message) - - # End of gallery - if s.find(id='no-images') is not None: - print('End of gallery') - break - - # Download all images on the page - for img in s.findAll('figure'): - download(img.find('a').attrs.get('href')) - sleep(args.interval) - - if args.category == "msg": - next_button = s.find('a', class_='button standard more', text="Next 48") - - if next_button is None or next_button.parent is None: - next_button = s.find('a', class_='button standard more-half', text="Next 48") - if next_button is None or next_button.parent is None: - print('Unable to find next button') - break - - next_page_link = next_button.attrs['href'] - - i = i + 1 - page_num = next_page_link.split('/')[-2] - page_url = base_url + next_page_link - - print('Downloading page', i, page_url) - elif args.category != "favorites": - next_button = s.find('button', class_='button standard', text="Next") - if next_button is None or next_button.parent is None: - print('Unable to find next button') - break - - page_num = next_button.parent.attrs['action'].split('/')[-2] - - print('Downloading page', page_num, page_url) + s = BeautifulSoup(response.text, "html.parser") + if s.find(class_="loggedin_user_avatar") is not None: + account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") + print("Logged in as", account_username) else: - next_button = s.find('a', class_='button mobile-button right', text="Next") - if next_button is None: - print('Unable to find next button') + print("Not logged in, NSFW content is inaccessible") + + while True: + if page_end == page_num: + print(f"Reached page {page_end}, stopping.") break - # unlike galleries that are sequentially numbered, favorites use a different scheme. - # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) - - next_page_link = next_button.attrs['href'] - next_fav_num = re.search(r'\d+', next_page_link) + page_url = f"{download_url}/{page_num}" + response = session.get(page_url) + s = BeautifulSoup(response.text, "html.parser") - if next_fav_num == None: - print('Failed to parse next favorite link.') + # System messages + if s.find(class_="notice-message") is not None: + try: + message = ( + s.find(class_="notice-message") + .find("div") + .find(class_="link-override") + .text.strip() + ) + except AttributeError: + print("You didn't provide cookies to log in") + break + raise Exception("System Message", message) + + # End of gallery + if s.find(id="no-images") is not None: + print("End of gallery") break - page_num = next_fav_num.group(0) + "/next" + # Download all images on the page + for img in s.findAll("figure"): + download(img.find("a").attrs.get("href")) + sleep(args.interval) - # parse it into numbers/next + if args.submissions is True: + next_button = s.find("a", class_="button standard more", text="Next 48") + if next_button is None or next_button.parent is None: + next_button = s.find( + "a", class_="button standard more-half", text="Next 48" + ) + if next_button is None or next_button.parent is None: + print("Unable to find next button") + break + + next_page_link = next_button.attrs["href"] + page_num = next_page_link.split("/")[-2] + page_url = BASE_URL + next_page_link + + print("Downloading page", page_num, page_url) + elif args.category != "favorites": + next_button = s.find("button", class_="button standard", text="Next") + if next_button is None or next_button.parent is None: + print("Unable to find next button") + break + + page_num = next_button.parent.attrs["action"].split("/")[-2] + + print("Downloading page", page_num, page_url) + else: + next_button = s.find("a", class_="button mobile-button right", text="Next") + if next_button is None: + print("Unable to find next button") + break + + # unlike galleries that are sequentially numbered, favorites use a different scheme. + # the "page_num" is instead: [set of numbers]/next (the trailing /next is required) + + next_page_link = next_button.attrs["href"] + next_fav_num = re.search(r"\d+", next_page_link) + + if next_fav_num is None: + print("Failed to parse next favorite link.") + break + + page_num = next_fav_num.group(0) + "/next" + + # parse it into numbers/next + + print("Downloading page", page_num, page_url) + + print("Finished downloading") - print('Downloading page', page_num, page_url) - - -print('Finished downloading') +if __name__ == "__main__": + main()