divided into multiple files

This commit is contained in:
Kentai Radiquum 2022-07-06 16:54:34 +05:00
parent c51c87d39b
commit 54478d3ed3
No known key found for this signature in database
GPG key ID: CB1FC16C710DB347
6 changed files with 572 additions and 509 deletions

2
.gitignore vendored
View file

@ -14,4 +14,4 @@ Submissions/
# vscode stuff
.vscode
list.txt
dl_dev.py
__pycache__

170
Modules/config.py Normal file
View file

@ -0,0 +1,170 @@
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description="Downloads the entire gallery/scraps/folder/favorites \
of a furaffinity user, or your submissions notifications",
epilog="""
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites \
of user mylafox \n
You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" \
-> will download gallery of users koul -> radiquum -> mylafox
You can also provide a file with user names that are separated by a new line\n
You can also log in to FurAffinity in a web browser and load cookies to \
download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \
gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \
submissions notifications \n
DISCLAIMER: It is your own responsibility to check whether batch downloading \
is allowed by FurAffinity terms of service and to abide by them.
""",
)
parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
)
parser.add_argument(
"category",
nargs="?",
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
)
parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file")
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
)
parser.add_argument(
"--check",
action="store_true",
help="check and download latest submissions of a user",
)
parser.add_argument(
"-ua",
"--user-agent",
dest="user_agent",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0",
help="Your browser's useragent, may be required, depending on your luck",
)
parser.add_argument(
"-sub",
"--submissions",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
)
parser.add_argument(
"-s", "--start", default=1, help="page number to start from", nargs="+"
)
parser.add_argument(
"-S",
"--stop",
default=0,
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
)
parser.add_argument(
"-rd",
"--redownload",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"-i",
"--interval",
type=int,
default=0,
help="delay between downloading pages in seconds [default: 0]",
)
parser.add_argument(
"-r",
"--rating",
action="store_false",
help="disable rating separation",
)
parser.add_argument(
"--filter",
action="store_true",
dest="submission_filter",
help="enable submission filter",
)
parser.add_argument(
"-m",
"--metadata",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission /view/12345678/",
)
parser.add_argument(
"-jd",
"--json-description",
dest="json_description",
action="store_true",
help="download description as a JSON list",
)
parser.add_argument(
"--login",
action="store_true",
help="extract furaffinity cookies directly from your browser",
)
args = parser.parse_args()
# positional
username = args.username
category = args.category
if username != None:
username = username.split(" ")
# Custom input
cookies = args.cookies
output_folder = args.output_folder
download = args.download
interval = args.interval
user_agent = args.user_agent
start = args.start
stop = args.stop
folder = args.folder
# True\False
login = args.login
check = args.check
submissions = args.submissions
json_description = args.json_description
metadata = args.metadata
dont_redownload = args.redownload
rating = args.rating
submission_filter = args.submission_filter
# Colors
SUCCESS_COLOR = "\033[1;92m"
WARN_COLOR = "\033[1;33m"
ERROR_COLOR = "\033[1;91m"
END = "\033[0m"
# Globals
BASE_URL = "https://www.furaffinity.net"

312
Modules/functions.py Normal file
View file

@ -0,0 +1,312 @@
import http.cookiejar as cookielib
import json
import os
import re
import browser_cookie3
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
import Modules.config as config
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
class download_complete(Exception):
pass
def download(path):
response = session.get(f"{config.BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
if s.find(class_="notice-message") is not None:
system_message_handler(s)
image = s.find(class_="download").find("a").attrs.get("href")
title = s.find(class_="submission-title").find("p").contents[0]
title = sanitize_filename(title)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if config.json_description is True:
dsc = []
filename = image.split("/")[-1:][0]
data = {
"id": int(path.split("/")[-2:-1][0]),
"filename": filename,
"author": s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text,
"date": s.find(class_="popup_date").attrs.get("title"),
"title": title,
"description": dsc,
"url": f"{config.BASE_URL}{path}",
"tags": [],
"category": s.find(class_="info").find(class_="category-name").text,
"type": s.find(class_="info").find(class_="type-name").text,
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": s.find(class_="rating-box").text.strip(),
"comments": [],
}
if config.submission_filter is True and check_filter(title) is True:
print(
f'{config.WARN_COLOR}"{title}" was filtered and will not be \
downloaded - {data.get("url")}{config.END}'
)
return True
image_url = f"https:{image}"
output = f"{config.output_folder}/{data.get('author')}"
if config.category != "gallery":
output = f"{config.output_folder}/{data.get('author')}/{config.category}"
if config.folder is not None:
output = f"{config.output_folder}/{data.get('author')}/{config.folder}"
os.makedirs(output, exist_ok=True)
filename = sanitize_filename(filename)
output_path = f"{output}/{title} - {filename}"
if config.rating is True:
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
if config.dont_redownload is True and os.path.isfile(output_path):
if config.check is True:
print(
f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}"
)
raise download_complete
print(
f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}'
)
return True
else:
download_file(
image_url,
output_path,
f'{title} - \
[{data.get("rating")}]',
)
if config.metadata is True:
create_metadata(output, data, s, title, filename)
if config.download is not None:
print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}')
return True
def create_metadata(output, data, s, title, filename):
if config.rating is True:
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}'
else:
os.makedirs(f"{output}/metadata", exist_ok=True)
metadata = f"{output}/metadata/{title} - {filename}"
# Extract description as list
if config.json_description is True:
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
data["tags"].append(tag.find("a").text)
except AttributeError:
print(f'{config.WARN_COLOR}"{title}" has no tags{config.END}')
# Extract comments
for comment in s.findAll(class_="comment_container"):
temp_ele = comment.find(class_="comment-parent")
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
# Comment is deleted or hidden
if comment.find(class_="comment-link") is None:
continue
data["comments"].append(
{
"cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]),
"parent_cid": parent_cid,
"content": comment.find(class_="comment_text").contents[0].strip(),
"username": comment.find(class_="comment_username").text,
"date": comment.find(class_="popup_date").attrs.get("title"),
}
)
# Write a UTF-8 encoded JSON file for metadata
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def check_filter(title):
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
match = re.search(
search,
title,
re.IGNORECASE,
)
if match is not None and title == match.string:
return True
return None
def system_message_handler(s):
try:
message = {
s.find(class_="notice-message")
.find("div")
.find(class_="link-override")
.text.strip()
}
except AttributeError:
message = (
s.find("section", class_="aligncenter notice-message")
.find("div", class_="section-body alignleft")
.find("div", class_="redirect-message")
.text.strip()
)
print(f"{config.WARN_COLOR}System Message: {message}{config.END}")
raise download_complete
def download_file(url, fname, desc):
try:
r = session.get(url, stream=True)
if r.status_code != 200:
print(
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{config.END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
unit="b",
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in r.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
except KeyboardInterrupt:
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
os.remove(fname)
exit()
return True
def login(user_agent):
session.headers.update({"User-Agent": user_agent})
CJ = browser_cookie3.load()
response = session.get(config.BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
s.find(class_="loggedin_user_avatar")
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f"{config.SUCCESS_COLOR}Logged in as: {account_username}{config.END}")
with open("cookies.txt", "w") as file:
file.write(
f"""# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value}
.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}"""
)
print(
f'{config.SUCCESS_COLOR}cookies saved successfully, now you can provide them \
by using "-c cookies.txt"{config.END}'
)
except AttributeError:
print(
f"{config.ERROR_COLOR}Error getting cookies, either you need to login into \
furaffinity in your browser, or you can export cookies.txt manually{config.END}"
)
exit()
def next_button(page_url):
response = session.get(page_url)
s = BeautifulSoup(response.text, "html.parser")
if config.submissions is True:
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
try:
next_button = s.find("a", class_="button standard more").attrs.get("href")
except AttributeError:
try:
next_button = s.find("a", class_="button standard more-half").attrs.get(
"href"
)
except AttributeError as e:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete from e
page_num = next_button.split("/")[-2]
elif config.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
page_num = next_button.parent.attrs["action"].split("/")[-2]
else:
page_num = fav_next_button(s)
print(f"Downloading page {page_num} - {page_url}")
return page_num
def fav_next_button(s):
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
next_button = s.find("a", class_="button standard right", text="Next")
if next_button is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
next_page_link = next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
raise download_complete
return f"{next_fav_num[0]}/next"

View file

@ -24,9 +24,8 @@ When downloading a folder make sure to put everything after **/folder/**, for ex
```help
usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]]
[--start START [START ...]] [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating]
[--filter] [--metadata] [--download DOWNLOAD] [-jd] [--login] [--check] [--output]
usage: furaffinity-dl.py [-h] [-c COOKIES] [--output OUTPUT_FOLDER] [--check] [-ua USER_AGENT] [-sub] [-f FOLDER] [-s START [START ...]]
[-S STOP] [-rd] [-i INTERVAL] [-r] [--filter] [-m] [--download DOWNLOAD] [-jd] [--login]
[username] [category]
Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications
@ -37,34 +36,34 @@ positional arguments:
options:
-h, --help show this help message and exit
-sub, --submissions download your submissions
-f FOLDER [FOLDER ...], --folder FOLDER [FOLDER ...]
full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here
-c COOKIES [COOKIES ...], --cookies COOKIES [COOKIES ...]
-c COOKIES, --cookies COOKIES
path to a NetScape cookies file
-ua USER_AGENT [USER_AGENT ...], --user-agent USER_AGENT [USER_AGENT ...]
--output OUTPUT_FOLDER, -o OUTPUT_FOLDER
set a custom output folder
--check check and download latest submissions of a user
-ua USER_AGENT, --user-agent USER_AGENT
Your browser's useragent, may be required, depending on your luck
--start START [START ...], -s START [START ...]
-sub, --submissions download your submissions
-f FOLDER, --folder FOLDER
full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here
-s START [START ...], --start START [START ...]
page number to start from
--stop STOP [STOP ...], -S STOP [STOP ...]
Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)
--redownload, -rd Redownload files that have been downloaded already
--interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...]
-S STOP, --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)
-rd, --redownload Redownload files that have been downloaded already
-i INTERVAL, --interval INTERVAL
delay between downloading pages in seconds [default: 0]
--rating, -r disable rating separation
-r, --rating disable rating separation
--filter enable submission filter
--metadata, -m enable metadata saving
-m, --metadata enable metadata saving
--download DOWNLOAD download a specific submission /view/12345678/
-jd, --json-description
download description as a JSON list
--login extract furaffinity cookies directly from your browser
--check check and download latest submissions of a user
--output, -o set a custom output folder
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox
python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox
You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox
@ -72,7 +71,7 @@ You can also provide a file with user names that are separated by a new line
You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications
DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them.

View file

@ -1,437 +1,38 @@
#!/usr/bin/python3
import argparse
import contextlib
import http.cookiejar as cookielib
import json
import os
import re
from time import sleep
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
# COLORS
WHITE = "\033[1;37m"
RED = "\033[1;91m"
GREEN = "\033[1;92m"
YELLOW = "\033[1;33m"
END = "\033[0m"
# Argument parsing
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description="Downloads the entire gallery/scraps/folder/favorites \
of a furaffinity user, or your submissions notifications",
epilog="""
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites \
of user mylafox \n
You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" \
-> will download gallery of users koul -> radiquum -> mylafox
You can also provide a file with user names that are separated by a new line\n
You can also log in to FurAffinity in a web browser and load cookies to \
download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \
gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \
submissions notifications \n
DISCLAIMER: It is your own responsibility to check whether batch downloading \
is allowed by FurAffinity terms of service and to abide by them.
""",
)
parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
)
parser.add_argument(
"category",
nargs="?",
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
)
parser.add_argument(
"-sub",
"--submissions",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
nargs="+",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
)
parser.add_argument(
"-c", "--cookies", nargs="+", help="path to a NetScape cookies file"
)
parser.add_argument(
"-ua",
"--user-agent",
dest="user_agent",
nargs="+",
default=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0"
],
help="Your browser's useragent, may be required, depending on your luck",
)
parser.add_argument(
"--start", "-s", default=[1], help="page number to start from", nargs="+"
)
parser.add_argument(
"--stop",
"-S",
default=[0],
nargs="+",
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
)
parser.add_argument(
"--redownload",
"-rd",
dest="dont_redownload",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"--interval",
"-i",
type=int,
default=[0],
nargs="+",
help="delay between downloading pages in seconds [default: 0]",
)
parser.add_argument(
"--rating",
"-r",
action="store_false",
help="disable rating separation",
)
parser.add_argument(
"--filter",
action="store_true",
help="enable submission filter",
)
parser.add_argument(
"--metadata",
"-m",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission /view/12345678/",
)
parser.add_argument(
"-jd",
"--json-description",
dest="json_description",
action="store_true",
help="download description as a JSON list",
)
parser.add_argument(
"--login",
action="store_true",
help="extract furaffinity cookies directly from your browser",
)
parser.add_argument(
"--check",
action="store_true",
help="check and download latest submissions of a user",
)
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
)
args = parser.parse_args()
BASE_URL = "https://www.furaffinity.net"
if args.username != None:
username = args.username.split(" ")
category = args.category
import Modules.config as config
from Modules.functions import download_complete
from Modules.functions import download
from Modules.functions import login
from Modules.functions import next_button
from Modules.functions import system_message_handler
# get session
session = requests.session()
session.headers.update({"User-Agent": args.user_agent[0]})
session.headers.update({"User-Agent": config.user_agent})
if args.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(args.cookies[0])
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
# Functions
def download_file(url, fname, desc):
try:
r = session.get(url, stream=True)
if r.status_code != 200:
print(
f'{RED}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
unit="b",
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in r.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
except KeyboardInterrupt:
print(f"{GREEN}Finished downloading{END}")
os.remove(fname)
exit()
return True
def system_message_handler(s):
try:
message = {
s.find(class_="notice-message")
.find("div").find(class_="link-override").text.strip()
}
except AttributeError:
message = (
s.find("section", class_="aligncenter notice-message")
.find("div", class_="section-body alignleft")
.find("div", class_="redirect-message")
.text.strip()
)
print(f"{YELLOW}System Message: {message}{END}")
raise System_Message
def check_filter(title):
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
match = re.search(
search,
title,
re.IGNORECASE,
)
if match is not None and title == match.string:
return True
return None
def create_metadata(output, data, s, title, filename):
os.makedirs(f'{output}/metadata', exist_ok=True)
metadata = f"{output}/metadata/{title} - {filename}"
if args.rating is True:
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}'
# Extract description as list
if args.json_description is True:
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
data["tags"].append(tag.find("a").text)
except AttributeError:
print(f'{YELLOW}"{title}" has no tags{END}')
# Extract comments
for comment in s.findAll(class_="comment_container"):
temp_ele = comment.find(class_="comment-parent")
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
# Comment is deleted or hidden
if comment.find(class_="comment-link") is None:
continue
data["comments"].append(
{
"cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]),
"parent_cid": parent_cid,
"content": comment.find(class_="comment_text").contents[0].strip(),
"username": comment.find(class_="comment_username").text,
"date": comment.find(class_="popup_date").attrs.get("title"),
}
)
# Write a UTF-8 encoded JSON file for metadata
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def login():
import browser_cookie3
CJ = browser_cookie3.load()
response = session.get(BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
s.find(class_="loggedin_user_avatar")
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f"{GREEN}Logged in as: {account_username}{END}")
with open("cookies.txt", "w") as file:
file.write(
f"""# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value}
.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}"""
)
print(
f'{GREEN}cookies saved successfully, now you can provide them \
by using "-c cookies.txt"{END}'
)
except AttributeError:
print(
f"{RED}Error getting cookies, either you need to login into \
furaffinity in your browser, or you can export cookies.txt manually{END}"
)
exit()
# File downloading
class Check_Complete(Exception):
pass
class System_Message(Exception):
pass
def download(path):
response = session.get(f"{BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
if s.find(class_="notice-message") is not None:
system_message_handler(s)
image = s.find(class_="download").find("a").attrs.get("href")
title = s.find(class_="submission-title").find("p").contents[0]
title = sanitize_filename(title)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if args.json_description is True:
dsc = []
filename = image.split("/")[-1:][0]
data = {
"id": int(path.split("/")[-2:-1][0]),
"filename": filename,
"author": s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text,
"date": s.find(class_="popup_date").attrs.get("title"),
"title": title,
"description": dsc,
"url": f"{BASE_URL}{path}",
"tags": [],
"category": s.find(class_="info").find(class_="category-name").text,
"type": s.find(class_="info").find(class_="type-name").text,
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": s.find(class_="rating-box").text.strip(),
"comments": [],
}
if args.filter is True and check_filter(title) is True:
print(f'{YELLOW}"{title}" was filtered and will not be \
downloaded - {data.get("url")}{END}')
return True
image_url = f"https:{image}"
output = f"{args.output_folder}/{data.get('author')}"
if category != "gallery":
output = f"{args.output_folder}/{data.get('author')}/{category}"
if args.folder is not None:
output = f"{args.output_folder}/{data.get('author')}/{folder[1]}"
os.makedirs(output, exist_ok=True)
filename = sanitize_filename(filename)
global output_path
output_path = f"{output}/{title} - {filename}"
if args.rating is True:
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
if args.dont_redownload is True and os.path.isfile(output_path):
if args.check is True:
print(f"{GREEN} Downloaded all recent files of \"{data.get('author')}\"")
raise Check_Complete
print(f'{YELLOW}Skipping "{title}" since it\'s already downloaded{END}')
return True
else:
download_file(
image_url,
output_path,
f'{title} - \
[{data.get("rating")}]',
)
if args.metadata is True:
create_metadata(output, data, s, title, filename)
return True
# Main function
def main():
page_end = args.stop[0]
page_num = args.start[0]
# download loop
with contextlib.suppress(Check_Complete, System_Message):
page_num = config.start
with contextlib.suppress(download_complete):
while True:
if page_end == page_num:
print(f"{YELLOW}Reached page {page_end}, stopping.{END}")
if config.stop == page_num:
print(
f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}'
)
break
page_url = f"{download_url}/{page_num}"
@ -444,99 +45,79 @@ def main():
# End of gallery
if s.find(id="no-images") is not None:
print(f"{GREEN}End of gallery{END}")
print(f"{config.SUCCESS_COLOR}End of gallery{config.END}")
break
# Download all images on the page
for img in s.findAll("figure"):
download(img.find("a").attrs.get("href"))
sleep(args.interval[0])
sleep(config.interval)
# Download submissions
if args.submissions is True:
try:
next_button = s.find("a", class_="button standard more").attrs.get(
"href"
)
except AttributeError:
try:
next_button = s.find(
"a", class_="button standard more-half"
).attrs.get("href")
except AttributeError:
print(f"{YELLOW}Unable to find next button{END}")
break
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
page_num = next_button.split("/")[-2]
page_url = f"{BASE_URL}{next_button}"
elif args.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
print(f"{YELLOW}Unable to find next button{END}")
break
page_num = next_button.parent.attrs["action"].split("/")[-2]
else:
next_button = s.find("a", class_="button standard right", text="Next")
if next_button is None:
print(f"{YELLOW}Unable to find next button{END}")
break
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
next_page_link = next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{YELLOW}Failed to parse next favorite link{END}")
break
page_num = f"{next_fav_num[0]}/next"
print(f"{WHITE}Downloading page {page_num} - {page_url} {END}")
print(
f"{GREEN}Finished \
downloading{END}"
)
page_num = next_button(page_url)
if __name__ == "__main__":
if args.login is True:
login()
if config.login is True:
login(config.user_agent)
try:
response = session.get(config.BASE_URL)
except ConnectionError:
print(f"{config.ERROR_COLOR}Connection failed{config.END}")
exit()
except KeyboardInterrupt:
print(f"{config.WARN_COLOR}Aborted by user{config.END}")
exit()
response = session.get(BASE_URL)
s = BeautifulSoup(response.text, "html.parser")
if s.find(class_="loggedin_user_avatar") is not None:
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f'{GREEN}Logged in as "{account_username}"{END}')
print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}')
else:
print(f"{YELLOW}Not logged in, NSFW content is inaccessible{END}")
print(
f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}"
)
if args.download is not None:
download(args.download)
print(f'{GREEN}File saved as "{output_path}" {END}')
if config.download is not None:
download(config.download)
exit()
if args.submissions is True:
download_url = f"{BASE_URL}/msg/submissions"
main()
exit()
if args.folder is not None:
folder = args.folder[0].split("/")
download_url = f"{BASE_URL}/gallery/{username[0]}/folder/{args.folder[0]}"
if config.submissions is True:
download_url = f"{config.BASE_URL}/msg/submissions"
main()
print(
f"{config.SUCCESS_COLOR}Finished \
downloading submissions{config.END}"
)
exit()
if os.path.exists(username[0]):
data = open(username[0]).read()
username = filter(None, data.split("\n"))
for username in username:
print(f'{GREEN}Now downloading "{username}"{END}')
download_url = f"{BASE_URL}/{category}/{username}"
if config.folder is not None:
folder = config.folder.split("/")
download_url = (
f"{config.BASE_URL}/gallery/{config.username}/folder/{config.folder[1]}"
)
main()
print(
f'{config.SUCCESS_COLOR}Finished \
downloading "{config.folder[1]}"{config.END}'
)
exit()
if config.category not in ["gallery", "scraps", "favorites"]:
print(
f"{config.ERROR_COLOR}Please enter a valid category [gallery/scraps/favorites] {config.END}"
)
exit()
if os.path.exists(config.username[0]):
data = open(config.username[0]).read()
config.username = filter(None, data.split("\n"))
for username in config.username:
print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
download_url = f"{config.BASE_URL}/{config.category}/{username}"
main()
print(
f'{config.SUCCESS_COLOR}Finished \
downloading "{username}"{config.END}'
)

View file

@ -3,3 +3,4 @@ requests
tqdm
browser-cookie3
pathvalidate
pre-commit