diff --git a/README.md b/README.md index d64c55c..22c205f 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,44 @@ This branch is the development version of furaffinity-dl rewritten in python. # FurAffinity Downloader -**furaffinity-dl** was a bash script (now in python) for batch downloading of galleries (and scraps/favourites) from furaffinity users. +**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favourites) from furaffinity users. It was written for preservation of culture, to counter the people nuking their galleries every once a while. -Supports all known submission types: images, texts and audio. +Supports all known submission types: images, text, flash and audio. ## Requirements -Exact requirements are unknown since its still in development, but you should only need `beautifulsoup4` to be installed (`pip3 install beautifulsoup4`). I will put a `requirements.txt` file in the repo soon. + +`pip3 install -r requirements.txt` **The script currently only works with the "Modern" theme** -furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows and any other platform that supports python. +furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows or any other platform that supports python. ## Usage -Run it with - `./furaffinity-dl.py category username` + +Run it with: + +`./furaffinity-dl.py category username` + or: - `python3 furaffinity-dl.py category username` + +`python3 furaffinity-dl.py category username` All files from the given section and user will be downloaded to the current directory. ### Examples - `python3 furaffinity-dl.py gallery koul` - `python3 furaffinity-dl.py -o koulsArt gallery koul` +`python3 furaffinity-dl.py gallery koul` - `python3 furaffinity-dl.py -o mylasFavs favorites mylafox` +`python3 furaffinity-dl.py -o koulsArt gallery koul` + +`python3 furaffinity-dl.py -o mylasFavs favorites mylafox` For a full list of command line arguments use `./furaffinity-dl -h`. -You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome base browsers](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user-agent): +You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent): - `python3 furaffinity-dl.py -c cookies.txt -u 'Mozilla/5.0 ....' gallery letodoesart` +`python3 furaffinity-dl.py -c cookies.txt -u 'Mozilla/5.0 ....' gallery letodoesart` ## TODO @@ -41,4 +47,5 @@ You can also log in to download restricted content. To do that, log in to FurAff - Login without having to export cookies ## Disclaimer + It is your own responsibility to check whether batch downloading is allowed by FurAffinity's terms of service and to abide by them. For further disclaimers see LICENSE. diff --git a/furaffinity-dl.py b/furaffinity-dl.py index 491b6ab..2226c4e 100755 --- a/furaffinity-dl.py +++ b/furaffinity-dl.py @@ -1,12 +1,11 @@ #!/usr/bin/python3 import argparse +from tqdm import tqdm from argparse import RawTextHelpFormatter import json from bs4 import BeautifulSoup import requests -import urllib.request import http.cookiejar as cookielib -import urllib.parse import re import os @@ -19,7 +18,6 @@ current ideas / things to do: metadata injection (gets messy easily) sqlite database support for classic theme - using `requests` instead of `urllib` turn this into a module ''' @@ -61,7 +59,7 @@ if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)): raise Exception('Username contains non-valid characters', args.username) # Initialise a session -session = requests.Session() +session = requests.session() session.headers.update({'User-Agent': args.ua}) # Load cookies from a netscape cookie file (if provided) @@ -74,9 +72,28 @@ base_url = 'https://www.furaffinity.net' gallery_url = '{}/{}/{}'.format(base_url, args.category, args.username) page_num = args.start +def download_file(url, fname, desc): + r = session.get(url, stream=True) + if r.status_code != 200: + print("Got a HTTP {} while downloading; skipping".format(r.status_code)) + return False + + total = int(r.headers.get('Content-Length', 0)) + with open(fname, 'wb') as file, tqdm( + desc=desc.ljust(40)[:40], + total=total, + miniters=100, + unit='b', + unit_scale=True, + unit_divisor=1024 + ) as bar: + for data in r.iter_content(chunk_size=1024): + size = file.write(data) + bar.update(size) + return True # The cursed function that handles downloading -def download_file(path): +def download(path): page_url = '{}{}'.format(base_url, path) response = session.get(page_url) s = BeautifulSoup(response.text, 'html.parser') @@ -111,7 +128,7 @@ def download_file(path): temp_ele = comment.find(class_='comment-parent') parent_cid = None if temp_ele is None else int(temp_ele.attrs.get('href')[5:]) - # Comment deleted or hidden + # Comment is deleted or hidden if comment.find(class_='comment-link') is None: continue @@ -123,21 +140,11 @@ def download_file(path): 'date': comment.find(class_='popup_date').attrs.get('title') }) - print('Downloading "{}"... '.format(title)) - - # Because for some god forsaken reason FA keeps the original filename in the upload, in the case that it contains non-ASCII - # characters it can make this thing blow up. So we have to do some annoying IRI stuff to make it work. Maybe consider `requests` - # instead of `urllib` - def strip_non_ascii(s): return ''.join(i for i in s if ord(i) < 128) - url = 'https:{}'.format(image) - url = urllib.parse.urlsplit(url) - url = list(url) - url[2] = urllib.parse.quote(url[2]) - url = urllib.parse.urlunsplit(url) - try: - urllib.request.urlretrieve(url, os.path.join(args.output, strip_non_ascii(filename))) - except urllib.error.HTTPError: - print("404 Not Found, skipping") + url ='https:{}'.format(image) + output_path = os.path.join(args.output, filename) + + if not download_file(url, output_path, data["title"]): + return False # Write a UTF-8 encoded JSON file for metadata with open(os.path.join(args.output, '{}.json'.format(filename)), 'w', encoding='utf-8') as f: @@ -156,7 +163,7 @@ while True: account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt') print('Logged in as', account_username) else: - print('Not logged in, some users gallery\'s may be unaccessible and NSFW content is not downloadable') + print('Not logged in, NSFW content is unaccessible') # System messages if s.find(class_='notice-message') is not None: @@ -174,7 +181,7 @@ while True: # Download all images on the page for img in s.findAll('figure'): - download_file(img.find('a').attrs.get('href')) + download(img.find('a').attrs.get('href')) page_num += 1 print('Downloading page', page_num) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..952bbfd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +beautifulsoup4 +requests +tqdm