mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-06 00:04:38 +00:00
A 404 does not break the script now
Slight refactor as well
This commit is contained in:
parent
e2ff807c40
commit
c87a1f5355
1 changed files with 23 additions and 19 deletions
|
@ -16,10 +16,9 @@ Please refer to LICENSE for licensing conditions.
|
||||||
current ideas / things to do:
|
current ideas / things to do:
|
||||||
-r replenish, keep downloading until it finds a already downloaded file
|
-r replenish, keep downloading until it finds a already downloaded file
|
||||||
-n number of posts to download
|
-n number of posts to download
|
||||||
file renaming to title
|
|
||||||
metadata injection (gets messy easily)
|
metadata injection (gets messy easily)
|
||||||
sqlite database
|
sqlite database
|
||||||
support for beta theme
|
support for classic theme
|
||||||
using `requests` instead of `urllib`
|
using `requests` instead of `urllib`
|
||||||
turn this into a module
|
turn this into a module
|
||||||
'''
|
'''
|
||||||
|
@ -44,7 +43,7 @@ parser.add_argument('-u', metavar='useragent', dest='ua', type=str, default='Moz
|
||||||
parser.add_argument('-s', metavar='start', dest='start', type=int, default=1, help="page number to start from")
|
parser.add_argument('-s', metavar='start', dest='start', type=int, default=1, help="page number to start from")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.username == None:
|
if args.username is None:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
@ -54,13 +53,13 @@ if args.output != '.':
|
||||||
|
|
||||||
# Check validity of category
|
# Check validity of category
|
||||||
valid_categories = ['gallery', 'favorites', 'scraps']
|
valid_categories = ['gallery', 'favorites', 'scraps']
|
||||||
if not args.category in valid_categories:
|
if args.category not in valid_categories:
|
||||||
raise Exception('Category is not valid', args.category)
|
raise Exception('Category is not valid', args.category)
|
||||||
|
|
||||||
# Check validity of username
|
# Check validity of username
|
||||||
if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)):
|
if bool(re.compile(r'[^a-zA-Z0-9\-~._]').search(args.username)):
|
||||||
raise Exception('Username contains non-valid characters', args.username)
|
raise Exception('Username contains non-valid characters', args.username)
|
||||||
|
|
||||||
# Initialise a session
|
# Initialise a session
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.headers.update({'User-Agent': args.ua})
|
session.headers.update({'User-Agent': args.ua})
|
||||||
|
@ -72,9 +71,10 @@ if args.cookies != '':
|
||||||
session.cookies = cookies
|
session.cookies = cookies
|
||||||
|
|
||||||
base_url = 'https://www.furaffinity.net'
|
base_url = 'https://www.furaffinity.net'
|
||||||
gallery_url = '{}/gallery/{}'.format(base_url, args.username)
|
gallery_url = '{}/{}/{}'.format(base_url, args.category, args.username)
|
||||||
page_num = args.start
|
page_num = args.start
|
||||||
|
|
||||||
|
|
||||||
# The cursed function that handles downloading
|
# The cursed function that handles downloading
|
||||||
def download_file(path):
|
def download_file(path):
|
||||||
page_url = '{}{}'.format(base_url, path)
|
page_url = '{}{}'.format(base_url, path)
|
||||||
|
@ -82,7 +82,7 @@ def download_file(path):
|
||||||
s = BeautifulSoup(response.text, 'html.parser')
|
s = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
image = s.find(class_='download').find('a').attrs.get('href')
|
image = s.find(class_='download').find('a').attrs.get('href')
|
||||||
title = s.find(class_='submission-title').find('p').contents[0];
|
title = s.find(class_='submission-title').find('p').contents[0]
|
||||||
filename = image.split("/")[-1:][0]
|
filename = image.split("/")[-1:][0]
|
||||||
data = {
|
data = {
|
||||||
'id': int(path.split('/')[-2:-1][0]),
|
'id': int(path.split('/')[-2:-1][0]),
|
||||||
|
@ -109,10 +109,10 @@ def download_file(path):
|
||||||
# Extract comments
|
# Extract comments
|
||||||
for comment in s.findAll(class_='comment_container'):
|
for comment in s.findAll(class_='comment_container'):
|
||||||
temp_ele = comment.find(class_='comment-parent')
|
temp_ele = comment.find(class_='comment-parent')
|
||||||
parent_cid = None if temp_ele == None else int(temp_ele.attrs.get('href')[5:])
|
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get('href')[5:])
|
||||||
|
|
||||||
# Comment deleted or hidden
|
# Comment deleted or hidden
|
||||||
if comment.find(class_='comment-link') == None:
|
if comment.find(class_='comment-link') is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
data['comments'].append({
|
data['comments'].append({
|
||||||
|
@ -123,10 +123,6 @@ def download_file(path):
|
||||||
'date': comment.find(class_='popup_date').attrs.get('title')
|
'date': comment.find(class_='popup_date').attrs.get('title')
|
||||||
})
|
})
|
||||||
|
|
||||||
# Write a UTF-8 encoded JSON file for metadata
|
|
||||||
with open(os.path.join(args.output, '{}.json'.format(filename)), 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
||||||
|
|
||||||
print('Downloading "{}"... '.format(title))
|
print('Downloading "{}"... '.format(title))
|
||||||
|
|
||||||
# Because for some god forsaken reason FA keeps the original filename in the upload, in the case that it contains non-ASCII
|
# Because for some god forsaken reason FA keeps the original filename in the upload, in the case that it contains non-ASCII
|
||||||
|
@ -138,7 +134,15 @@ def download_file(path):
|
||||||
url = list(url)
|
url = list(url)
|
||||||
url[2] = urllib.parse.quote(url[2])
|
url[2] = urllib.parse.quote(url[2])
|
||||||
url = urllib.parse.urlunsplit(url)
|
url = urllib.parse.urlunsplit(url)
|
||||||
urllib.request.urlretrieve(url, os.path.join(args.output, strip_non_ascii(filename)))
|
try:
|
||||||
|
urllib.request.urlretrieve(url, os.path.join(args.output, strip_non_ascii(filename)))
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
print("404 Not Found, skipping")
|
||||||
|
|
||||||
|
# Write a UTF-8 encoded JSON file for metadata
|
||||||
|
with open(os.path.join(args.output, '{}.json'.format(filename)), 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
|
||||||
# Main downloading loop
|
# Main downloading loop
|
||||||
while True:
|
while True:
|
||||||
|
@ -148,23 +152,23 @@ while True:
|
||||||
|
|
||||||
# Account status
|
# Account status
|
||||||
if page_num == 1:
|
if page_num == 1:
|
||||||
if s.find(class_='loggedin_user_avatar') != None:
|
if s.find(class_='loggedin_user_avatar') is not None:
|
||||||
account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt')
|
account_username = s.find(class_='loggedin_user_avatar').attrs.get('alt')
|
||||||
print('Logged in as', account_username)
|
print('Logged in as', account_username)
|
||||||
else:
|
else:
|
||||||
print('Not logged in, some users gallery\'s may be unaccessible and NSFW content is not downloadable')
|
print('Not logged in, some users gallery\'s may be unaccessible and NSFW content is not downloadable')
|
||||||
|
|
||||||
# System messages
|
# System messages
|
||||||
if s.find(class_='notice-message') != None:
|
if s.find(class_='notice-message') is not None:
|
||||||
message = s.find(class_='notice-message').find('div')
|
message = s.find(class_='notice-message').find('div')
|
||||||
for ele in message:
|
for ele in message:
|
||||||
if ele.name != None:
|
if ele.name is not None:
|
||||||
ele.decompose()
|
ele.decompose()
|
||||||
|
|
||||||
raise Exception('System Message', message.text.strip())
|
raise Exception('System Message', message.text.strip())
|
||||||
|
|
||||||
# End of gallery
|
# End of gallery
|
||||||
if s.find(id='no-images') != None:
|
if s.find(id='no-images') is not None:
|
||||||
print('End of gallery')
|
print('End of gallery')
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue