mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-05 07:44:37 +00:00
Swtich to requests
, tidy up README, add requirements.txt
This commit is contained in:
parent
c87a1f5355
commit
071e8692ad
3 changed files with 52 additions and 35 deletions
31
README.md
31
README.md
|
@ -1,38 +1,44 @@
|
|||
This branch is the development version of furaffinity-dl rewritten in python.
|
||||
|
||||
# FurAffinity Downloader
|
||||
**furaffinity-dl** was a bash script (now in python) for batch downloading of galleries (and scraps/favourites) from furaffinity users.
|
||||
**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favourites) from furaffinity users.
|
||||
It was written for preservation of culture, to counter the people nuking their galleries every once a while.
|
||||
|
||||
Supports all known submission types: images, texts and audio.
|
||||
Supports all known submission types: images, text, flash and audio.
|
||||
|
||||
## Requirements
|
||||
Exact requirements are unknown since its still in development, but you should only need `beautifulsoup4` to be installed (`pip3 install beautifulsoup4`). I will put a `requirements.txt` file in the repo soon.
|
||||
|
||||
`pip3 install -r requirements.txt`
|
||||
|
||||
**The script currently only works with the "Modern" theme**
|
||||
|
||||
furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows and any other platform that supports python.
|
||||
furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows or any other platform that supports python.
|
||||
|
||||
## Usage
|
||||
Run it with
|
||||
`./furaffinity-dl.py category username`
|
||||
|
||||
Run it with:
|
||||
|
||||
`./furaffinity-dl.py category username`
|
||||
|
||||
or:
|
||||
`python3 furaffinity-dl.py category username`
|
||||
|
||||
`python3 furaffinity-dl.py category username`
|
||||
|
||||
All files from the given section and user will be downloaded to the current directory.
|
||||
|
||||
### Examples
|
||||
`python3 furaffinity-dl.py gallery koul`
|
||||
|
||||
`python3 furaffinity-dl.py -o koulsArt gallery koul`
|
||||
`python3 furaffinity-dl.py gallery koul`
|
||||
|
||||
`python3 furaffinity-dl.py -o mylasFavs favorites mylafox`
|
||||
`python3 furaffinity-dl.py -o koulsArt gallery koul`
|
||||
|
||||
`python3 furaffinity-dl.py -o mylasFavs favorites mylafox`
|
||||
|
||||
For a full list of command line arguments use `./furaffinity-dl -h`.
|
||||
|
||||
You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome base browsers](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user-agent):
|
||||
You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, export cookies to a file from your web browser in Netscape format (there are extensions to do that [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)), you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent):
|
||||
|
||||
`python3 furaffinity-dl.py -c cookies.txt -u 'Mozilla/5.0 ....' gallery letodoesart`
|
||||
`python3 furaffinity-dl.py -c cookies.txt -u 'Mozilla/5.0 ....' gallery letodoesart`
|
||||
|
||||
## TODO
|
||||
|
||||
|
@ -41,4 +47,5 @@ You can also log in to download restricted content. To do that, log in to FurAff
|
|||
- Login without having to export cookies
|
||||
|
||||
## Disclaimer
|
||||
|
||||
It is your own responsibility to check whether batch downloading is allowed by FurAffinity's terms of service and to abide by them. For further disclaimers see LICENSE.
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
#!/usr/bin/python3
|
||||
import argparse
|
||||
from tqdm import tqdm
|
||||
from argparse import RawTextHelpFormatter
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import urllib.request
|
||||
import http.cookiejar as cookielib
|
||||
import urllib.parse
|
||||
import re
|
||||
import os
|
||||
|
||||
|
@ -19,7 +18,6 @@ current ideas / things to do:
|
|||
metadata injection (gets messy easily)
|
||||
sqlite database
|
||||
support for classic theme
|
||||
using `requests` instead of `urllib`
|
||||
turn this into a module
|
||||
'''
|
||||
|
||||
|
@ -61,7 +59,7 @@ if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)):
|
|||
raise Exception('Username contains non-valid characters', args.username)
|
||||
|
||||
# Initialise a session
|
||||
session = requests.Session()
|
||||
session = requests.session()
|
||||
session.headers.update({'User-Agent': args.ua})
|
||||
|
||||
# Load cookies from a netscape cookie file (if provided)
|
||||
|
@ -74,9 +72,28 @@ base_url = 'https://www.furaffinity.net'
|
|||
gallery_url = '{}/{}/{}'.format(base_url, args.category, args.username)
|
||||
page_num = args.start
|
||||
|
||||
def download_file(url, fname, desc):
|
||||
r = session.get(url, stream=True)
|
||||
if r.status_code != 200:
|
||||
print("Got a HTTP {} while downloading; skipping".format(r.status_code))
|
||||
return False
|
||||
|
||||
total = int(r.headers.get('Content-Length', 0))
|
||||
with open(fname, 'wb') as file, tqdm(
|
||||
desc=desc.ljust(40)[:40],
|
||||
total=total,
|
||||
miniters=100,
|
||||
unit='b',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024
|
||||
) as bar:
|
||||
for data in r.iter_content(chunk_size=1024):
|
||||
size = file.write(data)
|
||||
bar.update(size)
|
||||
return True
|
||||
|
||||
# The cursed function that handles downloading
|
||||
def download_file(path):
|
||||
def download(path):
|
||||
page_url = '{}{}'.format(base_url, path)
|
||||
response = session.get(page_url)
|
||||
s = BeautifulSoup(response.text, 'html.parser')
|
||||
|
@ -111,7 +128,7 @@ def download_file(path):
|
|||
temp_ele = comment.find(class_='comment-parent')
|
||||
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get('href')[5:])
|
||||
|
||||
# Comment deleted or hidden
|
||||
# Comment is deleted or hidden
|
||||
if comment.find(class_='comment-link') is None:
|
||||
continue
|
||||
|
||||
|
@ -123,21 +140,11 @@ def download_file(path):
|
|||
'date': comment.find(class_='popup_date').attrs.get('title')
|
||||
})
|
||||
|
||||
print('Downloading "{}"... '.format(title))
|
||||
|
||||
# Because for some god forsaken reason FA keeps the original filename in the upload, in the case that it contains non-ASCII
|
||||
# characters it can make this thing blow up. So we have to do some annoying IRI stuff to make it work. Maybe consider `requests`
|
||||
# instead of `urllib`
|
||||
def strip_non_ascii(s): return ''.join(i for i in s if ord(i) < 128)
|
||||
url = 'https:{}'.format(image)
|
||||
url = urllib.parse.urlsplit(url)
|
||||
url = list(url)
|
||||
url[2] = urllib.parse.quote(url[2])
|
||||
url = urllib.parse.urlunsplit(url)
|
||||
try:
|
||||
urllib.request.urlretrieve(url, os.path.join(args.output, strip_non_ascii(filename)))
|
||||
except urllib.error.HTTPError:
|
||||
print("404 Not Found, skipping")
|
||||
url ='https:{}'.format(image)
|
||||
output_path = os.path.join(args.output, filename)
|
||||
|
||||
if not download_file(url, output_path, data["title"]):
|
||||
return False
|
||||
|
||||
# Write a UTF-8 encoded JSON file for metadata
|
||||
with open(os.path.join(args.output, '{}.json'.format(filename)), 'w', encoding='utf-8') as f:
|
||||
|
@ -156,7 +163,7 @@ while True:
|
|||
account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt')
|
||||
print('Logged in as', account_username)
|
||||
else:
|
||||
print('Not logged in, some users gallery\'s may be unaccessible and NSFW content is not downloadable')
|
||||
print('Not logged in, NSFW content is unaccessible')
|
||||
|
||||
# System messages
|
||||
if s.find(class_='notice-message') is not None:
|
||||
|
@ -174,7 +181,7 @@ while True:
|
|||
|
||||
# Download all images on the page
|
||||
for img in s.findAll('figure'):
|
||||
download_file(img.find('a').attrs.get('href'))
|
||||
download(img.find('a').attrs.get('href'))
|
||||
|
||||
page_num += 1
|
||||
print('Downloading page', page_num)
|
||||
|
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
beautifulsoup4
|
||||
requests
|
||||
tqdm
|
Loading…
Add table
Reference in a new issue