mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-04 23:34:37 +00:00
system messages fix
changelog: - new file naming - added possibility to download only recent user submissions (may be useful to update downloaded galleries) - now you can download several users, also you can provide a path to a file with usernames, to download them all - readded custom output path and metadata folder - default args state changes - code cleaning
This commit is contained in:
parent
c4400d4f78
commit
c4a9e4e486
4 changed files with 362 additions and 308 deletions
7
.gitignore
vendored
7
.gitignore
vendored
|
@ -8,9 +8,10 @@ cookies.txt
|
|||
*.gif
|
||||
*.swf
|
||||
|
||||
# Download folder
|
||||
furaffinity-dl/
|
||||
# Default download folder
|
||||
Submissions/
|
||||
|
||||
# vscode stuff
|
||||
.vscode
|
||||
ignore
|
||||
list.txt
|
||||
dl_dev.py
|
||||
|
|
14
README.md
14
README.md
|
@ -24,9 +24,9 @@ When downloading a folder make sure to put everything after **/folder/**, for ex
|
|||
|
||||
```help
|
||||
|
||||
usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]] [--start START [START ...]]
|
||||
[--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating] [--filter] [--metadata] [--download DOWNLOAD]
|
||||
[-jd] [--login]
|
||||
usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]]
|
||||
[--start START [START ...]] [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating]
|
||||
[--filter] [--metadata] [--download DOWNLOAD] [-jd] [--login] [--check] [--output]
|
||||
[username] [category]
|
||||
|
||||
Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications
|
||||
|
@ -52,18 +52,24 @@ options:
|
|||
--interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...]
|
||||
delay between downloading pages in seconds [default: 0]
|
||||
--rating, -r disable rating separation
|
||||
--filter disable submission filter
|
||||
--filter enable submission filter
|
||||
--metadata, -m enable metadata saving
|
||||
--download DOWNLOAD download a specific submission /view/12345678/
|
||||
-jd, --json-description
|
||||
download description as a JSON list
|
||||
--login extract furaffinity cookies directly from your browser
|
||||
--check check and download latest submissions of a user
|
||||
--output, -o set a custom output folder
|
||||
|
||||
Examples:
|
||||
python3 furaffinity-dl.py koul -> will download gallery of user koul
|
||||
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
|
||||
python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox
|
||||
|
||||
You can also download a several users in one go like this:
|
||||
python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox
|
||||
You can also provide a file with user names that are separated by a new line
|
||||
|
||||
You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions:
|
||||
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions
|
||||
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#!/usr/bin/python3
|
||||
import argparse
|
||||
import contextlib
|
||||
import http.cookiejar as cookielib
|
||||
import json
|
||||
import os
|
||||
|
@ -8,8 +9,10 @@ from time import sleep
|
|||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from pathvalidate import sanitize_filename
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# COLORS
|
||||
WHITE = "\033[1;37m"
|
||||
RED = "\033[1;91m"
|
||||
|
@ -20,33 +23,54 @@ END = "\033[0m"
|
|||
# Argument parsing
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
description="Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications",
|
||||
description="Downloads the entire gallery/scraps/folder/favorites \
|
||||
of a furaffinity user, or your submissions notifications",
|
||||
epilog="""
|
||||
Examples:
|
||||
python3 furaffinity-dl.py koul -> will download gallery of user koul
|
||||
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
|
||||
python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox \n
|
||||
You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions:
|
||||
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions
|
||||
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications \n
|
||||
DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them.
|
||||
python3 furaffinity-dl.py mylafox favorites -> will download favorites \
|
||||
of user mylafox \n
|
||||
You can also download a several users in one go like this:
|
||||
python3 furaffinity-dl.py "koul radiquum mylafox" \
|
||||
-> will download gallery of users koul -> radiquum -> mylafox
|
||||
You can also provide a file with user names that are separated by a new line\n
|
||||
You can also log in to FurAffinity in a web browser and load cookies to \
|
||||
download age restricted content or submissions:
|
||||
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \
|
||||
gallery of user letodoesart including Mature and Adult submissions
|
||||
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \
|
||||
submissions notifications \n
|
||||
DISCLAIMER: It is your own responsibility to check whether batch downloading \
|
||||
is allowed by FurAffinity terms of service and to abide by them.
|
||||
""",
|
||||
)
|
||||
parser.add_argument("username", nargs="?", help="username of the furaffinity user")
|
||||
parser.add_argument(
|
||||
"username",
|
||||
nargs="?",
|
||||
help="username of the furaffinity \
|
||||
user",
|
||||
)
|
||||
parser.add_argument(
|
||||
"category",
|
||||
nargs="?",
|
||||
help="the category to download, gallery/scraps/favorites [default: gallery]",
|
||||
help="the category to download, gallery/scraps/favorites \
|
||||
[default: gallery]",
|
||||
default="gallery",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-sub", "--submissions", action="store_true", help="download your submissions"
|
||||
"-sub",
|
||||
"--submissions",
|
||||
action="store_true",
|
||||
help="download your \
|
||||
submissions",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--folder",
|
||||
nargs="+",
|
||||
help="full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here",
|
||||
help="full path of the furaffinity gallery folder. for instance 123456/\
|
||||
Folder-Name-Here",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--cookies", nargs="+", help="path to a NetScape cookies file"
|
||||
|
@ -57,7 +81,8 @@ parser.add_argument(
|
|||
dest="user_agent",
|
||||
nargs="+",
|
||||
default=[
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0"
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
|
||||
Firefox/101.0"
|
||||
],
|
||||
help="Your browser's useragent, may be required, depending on your luck",
|
||||
)
|
||||
|
@ -69,7 +94,9 @@ parser.add_argument(
|
|||
"-S",
|
||||
default=[0],
|
||||
nargs="+",
|
||||
help="Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)",
|
||||
help="Page number to stop on. Specify the full URL after the username: for \
|
||||
favorites pages (1234567890/next) or for submissions pages: \
|
||||
(new~123456789@48)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--redownload",
|
||||
|
@ -94,8 +121,8 @@ parser.add_argument(
|
|||
)
|
||||
parser.add_argument(
|
||||
"--filter",
|
||||
action="store_false",
|
||||
help="disable submission filter",
|
||||
action="store_true",
|
||||
help="enable submission filter",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metadata",
|
||||
|
@ -119,30 +146,25 @@ parser.add_argument(
|
|||
action="store_true",
|
||||
help="extract furaffinity cookies directly from your browser",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="check and download latest submissions of a user",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
dest="output_folder",
|
||||
default="Submissions",
|
||||
help="set a custom output folder",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
BASE_URL = "https://www.furaffinity.net"
|
||||
username = args.username
|
||||
|
||||
if (
|
||||
args.submissions is False and args.login is False and args.download is None
|
||||
): # check if you are not downloading submissions or a specific post
|
||||
categories = {
|
||||
"gallery": "gallery",
|
||||
"scraps": "scraps",
|
||||
"favorites": "favorites",
|
||||
}
|
||||
category = categories.get(args.category)
|
||||
if args.username is None:
|
||||
print(f"{RED}<!> please enter a FA Username{END}")
|
||||
exit()
|
||||
if category is None:
|
||||
print(f"{RED}<!> please enter a valid category gallery/scraps/favorites{END}")
|
||||
exit()
|
||||
|
||||
download_url = f"{BASE_URL}/{category}/{username}"
|
||||
output = f"furaffinity-dl/{category}/{username}"
|
||||
if args.username != None:
|
||||
username = args.username.split(" ")
|
||||
category = args.category
|
||||
|
||||
# get session
|
||||
session = requests.session()
|
||||
|
@ -153,7 +175,7 @@ if args.cookies is not None: # add cookies if present
|
|||
cookies.load()
|
||||
session.cookies = cookies
|
||||
|
||||
# File downloading
|
||||
# Functions
|
||||
|
||||
|
||||
def download_file(url, fname, desc):
|
||||
|
@ -161,13 +183,14 @@ def download_file(url, fname, desc):
|
|||
r = session.get(url, stream=True)
|
||||
if r.status_code != 200:
|
||||
print(
|
||||
f"{RED}<!> Got a HTTP {r.status_code} while downloading {fname}; ...skipping{END}"
|
||||
f'{RED}Got a HTTP {r.status_code} while downloading \
|
||||
"{fname}". URL {url} ...skipping{END}'
|
||||
)
|
||||
return False
|
||||
|
||||
total = int(r.headers.get("Content-Length", 0))
|
||||
with open(fname, "wb") as file, tqdm(
|
||||
desc=desc.ljust(40)[:40],
|
||||
desc=desc.ljust(40),
|
||||
total=total,
|
||||
miniters=100,
|
||||
unit="b",
|
||||
|
@ -178,282 +201,105 @@ def download_file(url, fname, desc):
|
|||
size = file.write(data)
|
||||
bar.update(size)
|
||||
except KeyboardInterrupt:
|
||||
print(f"{GREEN}<i> Finished downloading{END}")
|
||||
print(f"{GREEN}Finished downloading{END}")
|
||||
os.remove(fname)
|
||||
exit()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# checks if you are downloading forder or submission
|
||||
if args.folder is not None:
|
||||
folder = args.folder[0].split("/")
|
||||
download_url = f"{BASE_URL}/gallery/{username}/folder/{args.folder[0]}"
|
||||
output = f"furaffinity-dl/folders/{username}/{folder[1]}"
|
||||
if args.submissions is True:
|
||||
download_url = f"{BASE_URL}/msg/submissions"
|
||||
|
||||
|
||||
def download(path):
|
||||
response = session.get(f"{BASE_URL}{path}")
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
try:
|
||||
message = (
|
||||
s.find(class_="notice-message")
|
||||
.find("div")
|
||||
.find(class_="link-override")
|
||||
.text.strip()
|
||||
.replace(".", ". \n")
|
||||
)
|
||||
except AttributeError:
|
||||
message = (
|
||||
s.find(class_="notice-message")
|
||||
.find("div", class_="section-body alignleft")
|
||||
.find("div", class_="redirect-message")
|
||||
.text.strip()
|
||||
.replace(".", ". \n")
|
||||
)
|
||||
print(f"{YELLOW}<i> System Message: {message}{END}")
|
||||
exit()
|
||||
|
||||
image = s.find(class_="download").find("a").attrs.get("href")
|
||||
title = f' {s.find(class_="submission-title").find("p").contents[0]} '
|
||||
description = (
|
||||
s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
|
||||
)
|
||||
|
||||
if args.json_description is True:
|
||||
description = []
|
||||
filename = image.split("/")[-1:][0]
|
||||
data = {
|
||||
"id": int(path.split("/")[-2:-1][0]),
|
||||
"filename": filename,
|
||||
"author": s.find(class_="submission-id-sub-container")
|
||||
.find("a")
|
||||
.find("strong")
|
||||
.text,
|
||||
"date": s.find(class_="popup_date").attrs.get("title"),
|
||||
"title": title,
|
||||
"description": description,
|
||||
"url": f"{BASE_URL}{path}",
|
||||
"tags": [],
|
||||
"category": s.find(class_="info").find(class_="category-name").text,
|
||||
"type": s.find(class_="info").find(class_="type-name").text,
|
||||
"species": s.find(class_="info").findAll("div")[2].find("span").text,
|
||||
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
|
||||
"views": int(s.find(class_="views").find(class_="font-large").text),
|
||||
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
|
||||
"rating": s.find(class_="rating-box").text.strip(),
|
||||
"comments": [],
|
||||
}
|
||||
|
||||
if args.submissions is True or args.download is not None:
|
||||
global output
|
||||
output = f"furaffinity-dl/gallery/{data.get('author')}"
|
||||
|
||||
if args.filter is True:
|
||||
match = re.search(
|
||||
'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE|REM[insder]*\\W|\\bREF|\\bSale\\W|auction|multislot|stream|adopt',
|
||||
title,
|
||||
re.IGNORECASE,
|
||||
def system_message_handler(s):
|
||||
try:
|
||||
message = {
|
||||
s.find(class_="notice-message")
|
||||
.find("div").find(class_="link-override").text.strip()
|
||||
}
|
||||
except AttributeError:
|
||||
message = (
|
||||
s.find("section", class_="aligncenter notice-message")
|
||||
.find("div", class_="section-body alignleft")
|
||||
.find("div", class_="redirect-message")
|
||||
.text.strip()
|
||||
)
|
||||
if match is not None and title == match.string:
|
||||
print(
|
||||
f"{YELLOW}<i> post:{title}was filtered and will not be downloaded - {data.get('url')}{END}"
|
||||
)
|
||||
return True
|
||||
print(f"{YELLOW}System Message: {message}{END}")
|
||||
raise System_Message
|
||||
|
||||
image_url = f"https:{image}"
|
||||
|
||||
os.makedirs(output, exist_ok=True)
|
||||
global output_path
|
||||
output_path = f"{output}/{filename}"
|
||||
def check_filter(title, url):
|
||||
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
||||
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|
||||
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|
||||
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|
||||
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|
||||
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
||||
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
||||
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|
||||
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
||||
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|
||||
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
||||
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
||||
|REM[insder]*\\b\
|
||||
|\\bREF|\\bSale|auction|multislot|stream|adopt'
|
||||
|
||||
match = re.search(
|
||||
search,
|
||||
title,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if match is not None and title == match.string:
|
||||
print(
|
||||
f'{YELLOW}"{title}" was filtered and will not be \
|
||||
downloaded - {url}{END}'
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def create_metadata(output, data, s, title, filename):
|
||||
os.makedirs(f'{output}/metadata', exist_ok=True)
|
||||
metadata = f"{output}/metadata/{title} - {filename}"
|
||||
if args.rating is True:
|
||||
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
|
||||
output_path = f'{output}/{data.get("rating")}/{filename}'
|
||||
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
|
||||
metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}'
|
||||
|
||||
if args.dont_redownload is True and os.path.isfile(output_path):
|
||||
print(f"{YELLOW}<i> Skipping:{title} since it's already downloaded{END}")
|
||||
else:
|
||||
download_file(image_url, output_path, title)
|
||||
# Extract description as list
|
||||
if args.json_description is True:
|
||||
for desc in s.find("div", class_="submission-description").stripped_strings:
|
||||
data["description"].append(desc)
|
||||
|
||||
if args.metadata is True:
|
||||
# Extact tags
|
||||
|
||||
metadata = output_path
|
||||
try:
|
||||
for tag in s.find(class_="tags-row").findAll(class_="tags"):
|
||||
data["tags"].append(tag.find("a").text)
|
||||
except AttributeError:
|
||||
print(f'{YELLOW}"{title}" has no tags{END}')
|
||||
|
||||
# Extract description as list
|
||||
if args.json_description is True:
|
||||
for desc in s.find("div", class_="submission-description").stripped_strings:
|
||||
# Extract comments
|
||||
for comment in s.findAll(class_="comment_container"):
|
||||
temp_ele = comment.find(class_="comment-parent")
|
||||
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
|
||||
# Comment is deleted or hidden
|
||||
if comment.find(class_="comment-link") is None:
|
||||
continue
|
||||
|
||||
if re.search("<", desc) is True:
|
||||
desc = desc.replace("<", "")
|
||||
data["comments"].append(
|
||||
{
|
||||
"cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]),
|
||||
"parent_cid": parent_cid,
|
||||
"content": comment.find(class_="comment_text").contents[0].strip(),
|
||||
"username": comment.find(class_="comment_username").text,
|
||||
"date": comment.find(class_="popup_date").attrs.get("title"),
|
||||
}
|
||||
)
|
||||
|
||||
if re.search(">", desc) is True:
|
||||
desc = desc.replace(">", "")
|
||||
|
||||
if re.search("/", desc) is True:
|
||||
desc = desc.replace("/", "")
|
||||
|
||||
data["description"].append(desc)
|
||||
|
||||
# Extact tags
|
||||
|
||||
try:
|
||||
for tag in s.find(class_="tags-row").findAll(class_="tags"):
|
||||
data["tags"].append(tag.find("a").text)
|
||||
except AttributeError:
|
||||
print(f"{YELLOW}<i> post:{title} has no tags{END}")
|
||||
|
||||
# Extract comments
|
||||
for comment in s.findAll(class_="comment_container"):
|
||||
temp_ele = comment.find(class_="comment-parent")
|
||||
parent_cid = (
|
||||
None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
|
||||
)
|
||||
# Comment is deleted or hidden
|
||||
if comment.find(class_="comment-link") is None:
|
||||
continue
|
||||
|
||||
data["comments"].append(
|
||||
{
|
||||
"cid": int(
|
||||
comment.find(class_="comment-link").attrs.get("href")[5:]
|
||||
),
|
||||
"parent_cid": parent_cid,
|
||||
"content": comment.find(class_="comment_text").contents[0].strip(),
|
||||
"username": comment.find(class_="comment_username").text,
|
||||
"date": comment.find(class_="popup_date").attrs.get("title"),
|
||||
}
|
||||
)
|
||||
|
||||
# Write a UTF-8 encoded JSON file for metadata
|
||||
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if args.download is not None:
|
||||
download(args.download)
|
||||
print(f"{GREEN}<i> File saved as {output_path} {END}")
|
||||
exit()
|
||||
|
||||
# Main function
|
||||
|
||||
|
||||
def main():
|
||||
# check if you are logged in
|
||||
page_end = args.stop[0]
|
||||
page_num = args.start[0]
|
||||
page_url = f"{download_url}/{page_num}"
|
||||
response = session.get(page_url)
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
if s.find(class_="loggedin_user_avatar") is not None:
|
||||
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
|
||||
print(f"{GREEN}<i> Logged in as: {account_username}{END}")
|
||||
else:
|
||||
print(f"{YELLOW}<i> Not logged in, NSFW content is inaccessible{END}")
|
||||
|
||||
# download loop
|
||||
while True:
|
||||
if page_end == page_num:
|
||||
print(f"{YELLOW}<i> Reached page {page_end}, stopping.{END}")
|
||||
break
|
||||
|
||||
page_url = f"{download_url}/{page_num}"
|
||||
response = session.get(page_url)
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
try:
|
||||
message = (
|
||||
s.find(class_="notice-message")
|
||||
.find("div")
|
||||
.find(class_="link-override")
|
||||
.text.strip()
|
||||
.replace(".", ". \n")
|
||||
)
|
||||
except AttributeError:
|
||||
message = (
|
||||
s.find(class_="notice-message")
|
||||
.find("div", class_="section-body alignleft")
|
||||
.find("div", class_="redirect-message")
|
||||
.text.strip()
|
||||
.replace(".", ". \n")
|
||||
)
|
||||
print(f"{YELLOW}<i> System Message: {message}{END}")
|
||||
exit()
|
||||
|
||||
# End of gallery
|
||||
if s.find(id="no-images") is not None:
|
||||
print(f"{GREEN}<i> End of gallery{END}")
|
||||
break
|
||||
|
||||
# Download all images on the page
|
||||
for img in s.findAll("figure"):
|
||||
download(img.find("a").attrs.get("href"))
|
||||
sleep(args.interval[0])
|
||||
|
||||
# Download submissions
|
||||
if args.submissions is True:
|
||||
try:
|
||||
next_button = s.find("a", class_="button standard more").attrs.get(
|
||||
"href"
|
||||
)
|
||||
except AttributeError:
|
||||
try:
|
||||
next_button = s.find(
|
||||
"a", class_="button standard more-half"
|
||||
).attrs.get("href")
|
||||
except AttributeError:
|
||||
print(f"{YELLOW}<!> Unable to find next button{END}")
|
||||
break
|
||||
|
||||
# unlike galleries that are sequentially numbered, submissions use a different scheme.
|
||||
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
|
||||
|
||||
page_num = next_button.split("/")[-2]
|
||||
page_url = f"{BASE_URL}{next_button}"
|
||||
|
||||
print(f"{WHITE}<i> Downloading page {page_num} - {page_url} {END}")
|
||||
# Download everything else
|
||||
elif args.category != "favorites":
|
||||
next_button = s.find("button", class_="button standard", text="Next")
|
||||
if next_button is None or next_button.parent is None:
|
||||
print(f"{YELLOW}<!> Unable to find next button{END}")
|
||||
break
|
||||
|
||||
page_num = next_button.parent.attrs["action"].split("/")[-2]
|
||||
|
||||
print(f"{WHITE}<i> Downloading page {page_num} - {page_url} {END}")
|
||||
# Download favorites
|
||||
else:
|
||||
next_button = s.find("a", class_="button standard right", text="Next")
|
||||
if next_button is None:
|
||||
print(f"{YELLOW}<!> Unable to find next button{END}")
|
||||
break
|
||||
|
||||
# unlike galleries that are sequentially numbered, favorites use a different scheme.
|
||||
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
|
||||
|
||||
next_page_link = next_button.attrs["href"]
|
||||
next_fav_num = re.search(r"\d+", next_page_link)
|
||||
|
||||
if next_fav_num is None:
|
||||
print(f"{YELLOW}<!> Failed to parse next favorite link{END}")
|
||||
break
|
||||
|
||||
page_num = next_fav_num.group(0) + "/next"
|
||||
|
||||
# parse it into numbers/next
|
||||
|
||||
print(f"{WHITE}<i> Downloading page {page_num} - {page_url} {END}")
|
||||
|
||||
print(f"{GREEN}Finished downloading{END}")
|
||||
# Write a UTF-8 encoded JSON file for metadata
|
||||
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
def login():
|
||||
|
@ -470,7 +316,7 @@ def login():
|
|||
try:
|
||||
s.find(class_="loggedin_user_avatar")
|
||||
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
|
||||
print(f"{GREEN}<i> Logged in as: {account_username}{END}")
|
||||
print(f"{GREEN}Logged in as: {account_username}{END}")
|
||||
with open("cookies.txt", "w") as file:
|
||||
file.write(
|
||||
f"""# Netscape HTTP Cookie File
|
||||
|
@ -480,18 +326,218 @@ def login():
|
|||
.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}"""
|
||||
)
|
||||
print(
|
||||
f'{GREEN}<i> cookies saved successfully, now you can provide them by using "-c cookies.txt"{END}'
|
||||
f'{GREEN}cookies saved successfully, now you can provide them \
|
||||
by using "-c cookies.txt"{END}'
|
||||
)
|
||||
except AttributeError:
|
||||
print(
|
||||
f"{RED}<i> Error getting cookies, either you need to login into furaffinity in your browser, or you can export cookies.txt manually{END}"
|
||||
f"{RED}Error getting cookies, either you need to login into \
|
||||
furaffinity in your browser, or you can export cookies.txt manually{END}"
|
||||
)
|
||||
|
||||
exit()
|
||||
|
||||
|
||||
# File downloading
|
||||
|
||||
|
||||
class Check_Complete(Exception):
|
||||
pass
|
||||
|
||||
class System_Message(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def download(path):
|
||||
response = session.get(f"{BASE_URL}{path}")
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
system_message_handler(s)
|
||||
|
||||
image = s.find(class_="download").find("a").attrs.get("href")
|
||||
title = s.find(class_="submission-title").find("p").contents[0]
|
||||
title = sanitize_filename(title)
|
||||
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
|
||||
|
||||
if args.json_description is True:
|
||||
dsc = []
|
||||
filename = image.split("/")[-1:][0]
|
||||
data = {
|
||||
"id": int(path.split("/")[-2:-1][0]),
|
||||
"filename": filename,
|
||||
"author": s.find(class_="submission-id-sub-container")
|
||||
.find("a")
|
||||
.find("strong")
|
||||
.text,
|
||||
"date": s.find(class_="popup_date").attrs.get("title"),
|
||||
"title": title,
|
||||
"description": dsc,
|
||||
"url": f"{BASE_URL}{path}",
|
||||
"tags": [],
|
||||
"category": s.find(class_="info").find(class_="category-name").text,
|
||||
"type": s.find(class_="info").find(class_="type-name").text,
|
||||
"species": s.find(class_="info").findAll("div")[2].find("span").text,
|
||||
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
|
||||
"views": int(s.find(class_="views").find(class_="font-large").text),
|
||||
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
|
||||
"rating": s.find(class_="rating-box").text.strip(),
|
||||
"comments": [],
|
||||
}
|
||||
if args.filter is True:
|
||||
check_filter(title, data.get("url"))
|
||||
|
||||
image_url = f"https:{image}"
|
||||
output = f"{args.output_folder}/{data.get('author')}"
|
||||
if category != "gallery":
|
||||
output = f"{args.output_folder}/{data.get('author')}/{category}"
|
||||
if args.folder is not None:
|
||||
output = f"{args.output_folder}/{data.get('author')}/{folder[1]}"
|
||||
os.makedirs(output, exist_ok=True)
|
||||
filename = sanitize_filename(filename)
|
||||
global output_path
|
||||
output_path = f"{output}/{title} - {filename}"
|
||||
if args.rating is True:
|
||||
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
|
||||
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
|
||||
|
||||
if args.dont_redownload is True and os.path.isfile(output_path):
|
||||
if args.check is True:
|
||||
print(f"{GREEN} Downloaded all recent files of \"{data.get('author')}\"")
|
||||
raise Check_Complete
|
||||
print(f'{YELLOW}Skipping "{title}" since it\'s already downloaded{END}')
|
||||
return True
|
||||
else:
|
||||
download_file(
|
||||
image_url,
|
||||
output_path,
|
||||
f'{title} - \
|
||||
[{data.get("rating")}]',
|
||||
)
|
||||
|
||||
if args.metadata is True:
|
||||
create_metadata(output, data, s, title, filename)
|
||||
return True
|
||||
|
||||
|
||||
# Main function
|
||||
|
||||
|
||||
def main():
|
||||
page_end = args.stop[0]
|
||||
page_num = args.start[0]
|
||||
|
||||
# download loop
|
||||
with contextlib.suppress(Check_Complete, System_Message):
|
||||
while True:
|
||||
if page_end == page_num:
|
||||
print(f"{YELLOW}Reached page {page_end}, stopping.{END}")
|
||||
break
|
||||
|
||||
page_url = f"{download_url}/{page_num}"
|
||||
response = session.get(page_url)
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
system_message_handler(s)
|
||||
|
||||
# End of gallery
|
||||
if s.find(id="no-images") is not None:
|
||||
print(f"{GREEN}End of gallery{END}")
|
||||
break
|
||||
|
||||
# Download all images on the page
|
||||
for img in s.findAll("figure"):
|
||||
download(img.find("a").attrs.get("href"))
|
||||
sleep(args.interval[0])
|
||||
|
||||
# Download submissions
|
||||
if args.submissions is True:
|
||||
try:
|
||||
next_button = s.find("a", class_="button standard more").attrs.get(
|
||||
"href"
|
||||
)
|
||||
except AttributeError:
|
||||
try:
|
||||
next_button = s.find(
|
||||
"a", class_="button standard more-half"
|
||||
).attrs.get("href")
|
||||
except AttributeError:
|
||||
print(f"{YELLOW}Unable to find next button{END}")
|
||||
break
|
||||
# unlike galleries that are sequentially numbered, submissions use a different scheme.
|
||||
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
|
||||
|
||||
page_num = next_button.split("/")[-2]
|
||||
page_url = f"{BASE_URL}{next_button}"
|
||||
|
||||
elif args.category != "favorites":
|
||||
next_button = s.find("button", class_="button standard", text="Next")
|
||||
if next_button is None or next_button.parent is None:
|
||||
print(f"{YELLOW}Unable to find next button{END}")
|
||||
break
|
||||
|
||||
page_num = next_button.parent.attrs["action"].split("/")[-2]
|
||||
else:
|
||||
next_button = s.find("a", class_="button standard right", text="Next")
|
||||
if next_button is None:
|
||||
print(f"{YELLOW}Unable to find next button{END}")
|
||||
break
|
||||
|
||||
# unlike galleries that are sequentially numbered, favorites use a different scheme.
|
||||
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
|
||||
|
||||
next_page_link = next_button.attrs["href"]
|
||||
next_fav_num = re.search(r"\d+", next_page_link)
|
||||
|
||||
if next_fav_num is None:
|
||||
print(f"{YELLOW}Failed to parse next favorite link{END}")
|
||||
break
|
||||
|
||||
page_num = f"{next_fav_num[0]}/next"
|
||||
|
||||
print(f"{WHITE}Downloading page {page_num} - {page_url} {END}")
|
||||
print(
|
||||
f"{GREEN}Finished \
|
||||
downloading{END}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if args.login is True:
|
||||
login()
|
||||
|
||||
main()
|
||||
response = session.get(BASE_URL)
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
if s.find(class_="loggedin_user_avatar") is not None:
|
||||
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
|
||||
print(f'{GREEN}Logged in as "{account_username}"{END}')
|
||||
else:
|
||||
print(f"{YELLOW}Not logged in, NSFW content is inaccessible{END}")
|
||||
|
||||
if args.download is not None:
|
||||
download(args.download)
|
||||
print(f'{GREEN}File saved as "{output_path}" {END}')
|
||||
exit()
|
||||
|
||||
if args.submissions is True:
|
||||
download_url = f"{BASE_URL}/msg/submissions"
|
||||
main()
|
||||
exit()
|
||||
|
||||
if args.folder is not None:
|
||||
folder = args.folder[0].split("/")
|
||||
download_url = f"{BASE_URL}/gallery/{username[0]}/folder/{args.folder[0]}"
|
||||
main()
|
||||
exit()
|
||||
|
||||
if os.path.exists(username[0]):
|
||||
data = open(username[0]).read()
|
||||
username = filter(None, data.split("\n"))
|
||||
|
||||
for username in username:
|
||||
print(f'{GREEN}Now downloading "{username}"{END}')
|
||||
download_url = f"{BASE_URL}/{category}/{username}"
|
||||
main()
|
||||
|
|
|
@ -2,3 +2,4 @@ beautifulsoup4
|
|||
requests
|
||||
tqdm
|
||||
browser-cookie3
|
||||
pathvalidate
|
||||
|
|
Loading…
Add table
Reference in a new issue