.*
' "$tempfile" | awk -F "" '{print $2}' | awk -F "
" '{print $1}')" - fi - - file_type="${image_url##*.}" - file_name="$(echo "$image_url" | cut -d "/" -f 7)" - if [[ "$file_name" =~ ^[0-9]{0,12}$ ]]; then - file_name="$(echo "$image_url" | cut -d "/" -f 8)" - fi - - # Choose the output path - if [ $rename = true ]; then - # FIXME titles that are just a single emoji get changed to " " and overwrite eachother - file="$outdir/$(echo "$title" | sed -e 's/[^A-Za-z0-9._-]/ /g').$file_type" - else - file="$outdir/$file_name" - fi - - # Download the image - if [ ! -f "$file" ] || [ $overwrite = true ] ; then - wget --quiet --show-progress "$image_url" -O "$file" - else - echo "File already exists, skipping. Use -w to skip this check" - fi - - mime_type="$(file -- "$file")" - - if [ $textmeta = true ]; then - echo -ne "Title: $title\nURL: $page\nFilename: $file_name\nDescription: $description" > "$file.meta" - fi - - # Add metadata - if [[ $mime_type == *"audio"* ]]; then - # Use eyeD3 for injecting metadata into audio files (if it's installed) - if [ $eyed3 = true ] && [ $metadata = true ]; then - if [ -z "$description" ]; then - eyeD3 -t "$title" -- "$file" || true - else - # HACK: eyeD3 throws an error if a description containing a ":" - eyeD3 -t "$title" --add-comment "${description//:/\\:}" -- "$file" || true - fi - fi - elif [[ $mime_type == *"image"* ]]; then - # Use exiftool for injecting metadata into pictures (if it's installed) - if [ $exiftool = true ] && [ $metadata = true ]; then - cat -- "$file" | exiftool -description="$description" -title="$title" -overwrite_original - > "$tempfile" && mv -- "$tempfile" "$file" || true - fi - fi - - # If there is a file download limit then keep track of it - if [ "$maxsavefiles" -ne "0" ]; then - download_count="$((download_count + 1))" - - if [ "$download_count" -ge "$maxsavefiles" ]; then - echo "Reached set file download limit." - exit 0 - fi - fi - done - - [ -z "$next_page_url" ] && break - url='https://www.furaffinity.net'"$next_page_url" -done diff --git a/furaffinity-dl.py b/furaffinity-dl.py new file mode 100755 index 0000000..af1e3f2 --- /dev/null +++ b/furaffinity-dl.py @@ -0,0 +1,173 @@ +#!/usr/bin/python3 +import argparse +from argparse import RawTextHelpFormatter +import json +from bs4 import BeautifulSoup +import requests +import urllib.request +import http.cookiejar as cookielib +import urllib.parse +import re +import os + +''' +Please refer to LICENSE for licensing conditions. + +current ideas / things to do: + -r replenish, keep downloading until it finds a already downloaded file + -n number of posts to download + file renaming to title + metadata injection (gets messy easily) + sqlite database + support for beta theme + using `requests` instead of `urllib` + turn this into a module +''' + +# Argument parsing +parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description='Downloads the entire gallery/scraps/favorites of a furaffinity user', epilog=''' +Examples: + python3 fadl.py gallery koul + python3 fadl.py -o koulsArt gallery koul + python3 fadl.py -o mylasFavs favorites mylafox\n +You can also log in to FurAffinity in a web browser and load cookies to download restricted content: + python3 fadl.py -c cookies.txt gallery letodoesart\n +DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. +''') +parser.add_argument('category', metavar='category', type=str, nargs='?', default='gallery', + help='the category to download, gallery/scraps/favorites') +parser.add_argument('username', metavar='username', type=str, nargs='?', + help='username of the furaffinity user') +parser.add_argument('-o', metavar='output', dest='output', type=str, default='.', help="output directory") +parser.add_argument('-c', metavar='cookies', dest='cookies', type=str, default='', help="path to a NetScape cookies file") +parser.add_argument('-s', metavar='start', dest='start', type=int, default=1, help="page number to start from") + +args = parser.parse_args() +if args.username == None: + parser.print_help() + exit() + +# Create output directory if it doesn't exist +if args.output != '.': + os.makedirs(args.output, exist_ok=True) + +# Check validity of category +valid_categories = ['gallery', 'favorites', 'scraps'] +if not args.category in valid_categories: + raise Exception('Category is not valid', args.category) + +# Check validity of username +if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)): + raise Exception('Username contains non-valid characters', args.username) + +# Initialise a session +session = requests.Session() +session.headers.update({'User-Agent': 'furaffinity-dl redevelopment'}) + +# Load cookies from a netscape cookie file (if provided) +if args.cookies != '': + cookies = cookielib.MozillaCookieJar(args.cookies) + cookies.load() + session.cookies = cookies + +base_url = 'https://www.furaffinity.net' +gallery_url = '{}/gallery/{}'.format(base_url, args.username) +page_num = args.start + +# The cursed function that handles downloading +def download_file(path): + page_url = '{}{}'.format(base_url, path) + response = session.get(page_url) + s = BeautifulSoup(response.text, 'html.parser') + + image = s.find(class_='download').find('a').attrs.get('href') + title = s.find(class_='submission-title').find('p').contents[0]; + filename = image.split("/")[-1:][0] + data = { + 'id': int(path.split('/')[-2:-1][0]), + 'filename': filename, + 'author': s.find(class_='submission-id-sub-container').find('a').find('strong').text, + 'date': s.find(class_='popup_date').attrs.get('title'), + 'title': title, + 'description': s.find(class_='submission-description').text.strip().replace('\r\n', '\n'), + "tags": [], + 'views': int(s.find(class_='views').find(class_='font-large').text), + 'favorites': int(s.find(class_='favorites').find(class_='font-large').text), + 'rating': s.find(class_='rating-box').text.strip(), + 'comments': [] + } + + # Extact tags + for tag in s.find(class_='tags-row').findAll(class_='tags'): + data['tags'].append(tag.find('a').text) + + # Extract comments + for comment in s.findAll(class_='comment_container'): + temp_ele = comment.find(class_='comment-parent') + parent_cid = None if temp_ele == None else int(temp_ele.attrs.get('href')[5:]) + + # Comment deleted or hidden + if comment.find(class_='comment-link') == None: + continue + + data['comments'].append({ + 'cid': int(comment.find(class_='comment-link').attrs.get('href')[5:]), + 'parent_cid': parent_cid, + 'content': comment.find(class_='comment_text').contents[0].strip(), + 'username': comment.find(class_='comment_username').text, + 'date': comment.find(class_='popup_date').attrs.get('title') + }) + + # Write a UTF-8 encoded JSON file for metadata + with open(os.path.join(args.output, '{}.json'.format(filename)), 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=4) + + print('Downloading "{}"... '.format(title)) + + # Because for some god forsaken reason FA keeps the original filename in the upload, in the case that it contains non-ASCII + # characters it can make this thing blow up. So we have to do some annoying IRI stuff to make it work. Maybe consider `requests` + # instead of urllib + def strip_non_ascii(s): return ''.join(i for i in s if ord(i) < 128) + url = 'https:{}'.format(image) + url = urllib.parse.urlsplit(url) + url = list(url) + url[2] = urllib.parse.quote(url[2]) + url = urllib.parse.urlunsplit(url) + urllib.request.urlretrieve(url, os.path.join(args.output, strip_non_ascii(filename))) + +# Main downloading loop +while True: + page_url = '{}/{}'.format(gallery_url, page_num) + response = session.get(page_url) + s = BeautifulSoup(response.text, 'html.parser') + + # Account status + if page_num == 1: + if s.find(class_='loggedin_user_avatar') != None: + account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt') + print('Logged in as', account_username) + else: + print('Not logged in, some users gallery\'s may be unaccessible and NSFW content is not downloadable') + + # System messages + if s.find(class_='notice-message') != None: + message = s.find(class_='notice-message').find('div') + for ele in message: + if ele.name != None: + ele.decompose() + + raise Exception('System Message', message.text.strip()) + + # End of gallery + if s.find(id='no-images') != None: + print('End of gallery') + break + + # Download all images on the page + for img in s.findAll('figure'): + download_file(img.find('a').attrs.get('href')) + + page_num += 1 + print('Downloading page', page_num) + +print('Finished downloading') \ No newline at end of file