divided into multiple files

This commit is contained in:
Kentai Radiquum 2022-07-06 16:54:34 +05:00
parent c51c87d39b
commit 54478d3ed3
No known key found for this signature in database
GPG key ID: CB1FC16C710DB347
6 changed files with 572 additions and 509 deletions

2
.gitignore vendored
View file

@ -14,4 +14,4 @@ Submissions/
# vscode stuff # vscode stuff
.vscode .vscode
list.txt list.txt
dl_dev.py __pycache__

170
Modules/config.py Normal file
View file

@ -0,0 +1,170 @@
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description="Downloads the entire gallery/scraps/folder/favorites \
of a furaffinity user, or your submissions notifications",
epilog="""
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites \
of user mylafox \n
You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" \
-> will download gallery of users koul -> radiquum -> mylafox
You can also provide a file with user names that are separated by a new line\n
You can also log in to FurAffinity in a web browser and load cookies to \
download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \
gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \
submissions notifications \n
DISCLAIMER: It is your own responsibility to check whether batch downloading \
is allowed by FurAffinity terms of service and to abide by them.
""",
)
parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
)
parser.add_argument(
"category",
nargs="?",
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
)
parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file")
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
)
parser.add_argument(
"--check",
action="store_true",
help="check and download latest submissions of a user",
)
parser.add_argument(
"-ua",
"--user-agent",
dest="user_agent",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0",
help="Your browser's useragent, may be required, depending on your luck",
)
parser.add_argument(
"-sub",
"--submissions",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
)
parser.add_argument(
"-s", "--start", default=1, help="page number to start from", nargs="+"
)
parser.add_argument(
"-S",
"--stop",
default=0,
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
)
parser.add_argument(
"-rd",
"--redownload",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"-i",
"--interval",
type=int,
default=0,
help="delay between downloading pages in seconds [default: 0]",
)
parser.add_argument(
"-r",
"--rating",
action="store_false",
help="disable rating separation",
)
parser.add_argument(
"--filter",
action="store_true",
dest="submission_filter",
help="enable submission filter",
)
parser.add_argument(
"-m",
"--metadata",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission /view/12345678/",
)
parser.add_argument(
"-jd",
"--json-description",
dest="json_description",
action="store_true",
help="download description as a JSON list",
)
parser.add_argument(
"--login",
action="store_true",
help="extract furaffinity cookies directly from your browser",
)
args = parser.parse_args()
# positional
username = args.username
category = args.category
if username != None:
username = username.split(" ")
# Custom input
cookies = args.cookies
output_folder = args.output_folder
download = args.download
interval = args.interval
user_agent = args.user_agent
start = args.start
stop = args.stop
folder = args.folder
# True\False
login = args.login
check = args.check
submissions = args.submissions
json_description = args.json_description
metadata = args.metadata
dont_redownload = args.redownload
rating = args.rating
submission_filter = args.submission_filter
# Colors
SUCCESS_COLOR = "\033[1;92m"
WARN_COLOR = "\033[1;33m"
ERROR_COLOR = "\033[1;91m"
END = "\033[0m"
# Globals
BASE_URL = "https://www.furaffinity.net"

312
Modules/functions.py Normal file
View file

@ -0,0 +1,312 @@
import http.cookiejar as cookielib
import json
import os
import re
import browser_cookie3
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
import Modules.config as config
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
class download_complete(Exception):
pass
def download(path):
response = session.get(f"{config.BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
if s.find(class_="notice-message") is not None:
system_message_handler(s)
image = s.find(class_="download").find("a").attrs.get("href")
title = s.find(class_="submission-title").find("p").contents[0]
title = sanitize_filename(title)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if config.json_description is True:
dsc = []
filename = image.split("/")[-1:][0]
data = {
"id": int(path.split("/")[-2:-1][0]),
"filename": filename,
"author": s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text,
"date": s.find(class_="popup_date").attrs.get("title"),
"title": title,
"description": dsc,
"url": f"{config.BASE_URL}{path}",
"tags": [],
"category": s.find(class_="info").find(class_="category-name").text,
"type": s.find(class_="info").find(class_="type-name").text,
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": s.find(class_="rating-box").text.strip(),
"comments": [],
}
if config.submission_filter is True and check_filter(title) is True:
print(
f'{config.WARN_COLOR}"{title}" was filtered and will not be \
downloaded - {data.get("url")}{config.END}'
)
return True
image_url = f"https:{image}"
output = f"{config.output_folder}/{data.get('author')}"
if config.category != "gallery":
output = f"{config.output_folder}/{data.get('author')}/{config.category}"
if config.folder is not None:
output = f"{config.output_folder}/{data.get('author')}/{config.folder}"
os.makedirs(output, exist_ok=True)
filename = sanitize_filename(filename)
output_path = f"{output}/{title} - {filename}"
if config.rating is True:
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
if config.dont_redownload is True and os.path.isfile(output_path):
if config.check is True:
print(
f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}"
)
raise download_complete
print(
f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}'
)
return True
else:
download_file(
image_url,
output_path,
f'{title} - \
[{data.get("rating")}]',
)
if config.metadata is True:
create_metadata(output, data, s, title, filename)
if config.download is not None:
print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}')
return True
def create_metadata(output, data, s, title, filename):
if config.rating is True:
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}'
else:
os.makedirs(f"{output}/metadata", exist_ok=True)
metadata = f"{output}/metadata/{title} - {filename}"
# Extract description as list
if config.json_description is True:
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
data["tags"].append(tag.find("a").text)
except AttributeError:
print(f'{config.WARN_COLOR}"{title}" has no tags{config.END}')
# Extract comments
for comment in s.findAll(class_="comment_container"):
temp_ele = comment.find(class_="comment-parent")
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
# Comment is deleted or hidden
if comment.find(class_="comment-link") is None:
continue
data["comments"].append(
{
"cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]),
"parent_cid": parent_cid,
"content": comment.find(class_="comment_text").contents[0].strip(),
"username": comment.find(class_="comment_username").text,
"date": comment.find(class_="popup_date").attrs.get("title"),
}
)
# Write a UTF-8 encoded JSON file for metadata
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def check_filter(title):
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
match = re.search(
search,
title,
re.IGNORECASE,
)
if match is not None and title == match.string:
return True
return None
def system_message_handler(s):
try:
message = {
s.find(class_="notice-message")
.find("div")
.find(class_="link-override")
.text.strip()
}
except AttributeError:
message = (
s.find("section", class_="aligncenter notice-message")
.find("div", class_="section-body alignleft")
.find("div", class_="redirect-message")
.text.strip()
)
print(f"{config.WARN_COLOR}System Message: {message}{config.END}")
raise download_complete
def download_file(url, fname, desc):
try:
r = session.get(url, stream=True)
if r.status_code != 200:
print(
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{config.END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
unit="b",
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in r.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
except KeyboardInterrupt:
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
os.remove(fname)
exit()
return True
def login(user_agent):
session.headers.update({"User-Agent": user_agent})
CJ = browser_cookie3.load()
response = session.get(config.BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
s.find(class_="loggedin_user_avatar")
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f"{config.SUCCESS_COLOR}Logged in as: {account_username}{config.END}")
with open("cookies.txt", "w") as file:
file.write(
f"""# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value}
.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}"""
)
print(
f'{config.SUCCESS_COLOR}cookies saved successfully, now you can provide them \
by using "-c cookies.txt"{config.END}'
)
except AttributeError:
print(
f"{config.ERROR_COLOR}Error getting cookies, either you need to login into \
furaffinity in your browser, or you can export cookies.txt manually{config.END}"
)
exit()
def next_button(page_url):
response = session.get(page_url)
s = BeautifulSoup(response.text, "html.parser")
if config.submissions is True:
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
try:
next_button = s.find("a", class_="button standard more").attrs.get("href")
except AttributeError:
try:
next_button = s.find("a", class_="button standard more-half").attrs.get(
"href"
)
except AttributeError as e:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete from e
page_num = next_button.split("/")[-2]
elif config.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
page_num = next_button.parent.attrs["action"].split("/")[-2]
else:
page_num = fav_next_button(s)
print(f"Downloading page {page_num} - {page_url}")
return page_num
def fav_next_button(s):
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
next_button = s.find("a", class_="button standard right", text="Next")
if next_button is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
next_page_link = next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
raise download_complete
return f"{next_fav_num[0]}/next"

View file

@ -24,9 +24,8 @@ When downloading a folder make sure to put everything after **/folder/**, for ex
```help ```help
usage: furaffinity-dl.py [-h] [-sub] [-f FOLDER [FOLDER ...]] [-c COOKIES [COOKIES ...]] [-ua USER_AGENT [USER_AGENT ...]] usage: furaffinity-dl.py [-h] [-c COOKIES] [--output OUTPUT_FOLDER] [--check] [-ua USER_AGENT] [-sub] [-f FOLDER] [-s START [START ...]]
[--start START [START ...]] [--stop STOP [STOP ...]] [--redownload] [--interval INTERVAL [INTERVAL ...]] [--rating] [-S STOP] [-rd] [-i INTERVAL] [-r] [--filter] [-m] [--download DOWNLOAD] [-jd] [--login]
[--filter] [--metadata] [--download DOWNLOAD] [-jd] [--login] [--check] [--output]
[username] [category] [username] [category]
Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications
@ -37,34 +36,34 @@ positional arguments:
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
-sub, --submissions download your submissions -c COOKIES, --cookies COOKIES
-f FOLDER [FOLDER ...], --folder FOLDER [FOLDER ...]
full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here
-c COOKIES [COOKIES ...], --cookies COOKIES [COOKIES ...]
path to a NetScape cookies file path to a NetScape cookies file
-ua USER_AGENT [USER_AGENT ...], --user-agent USER_AGENT [USER_AGENT ...] --output OUTPUT_FOLDER, -o OUTPUT_FOLDER
set a custom output folder
--check check and download latest submissions of a user
-ua USER_AGENT, --user-agent USER_AGENT
Your browser's useragent, may be required, depending on your luck Your browser's useragent, may be required, depending on your luck
--start START [START ...], -s START [START ...] -sub, --submissions download your submissions
-f FOLDER, --folder FOLDER
full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here
-s START [START ...], --start START [START ...]
page number to start from page number to start from
--stop STOP [STOP ...], -S STOP [STOP ...] -S STOP, --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)
Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48) -rd, --redownload Redownload files that have been downloaded already
--redownload, -rd Redownload files that have been downloaded already -i INTERVAL, --interval INTERVAL
--interval INTERVAL [INTERVAL ...], -i INTERVAL [INTERVAL ...]
delay between downloading pages in seconds [default: 0] delay between downloading pages in seconds [default: 0]
--rating, -r disable rating separation -r, --rating disable rating separation
--filter enable submission filter --filter enable submission filter
--metadata, -m enable metadata saving -m, --metadata enable metadata saving
--download DOWNLOAD download a specific submission /view/12345678/ --download DOWNLOAD download a specific submission /view/12345678/
-jd, --json-description -jd, --json-description
download description as a JSON list download description as a JSON list
--login extract furaffinity cookies directly from your browser --login extract furaffinity cookies directly from your browser
--check check and download latest submissions of a user
--output, -o set a custom output folder
Examples: Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox python3 furaffinity-dl.py mylafox favorites -> will download favorites of user mylafox
You can also download a several users in one go like this: You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox python3 furaffinity-dl.py "koul radiquum mylafox" -> will download gallery of users koul -> radiquum -> mylafox
@ -72,7 +71,7 @@ You can also provide a file with user names that are separated by a new line
You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions: You can also log in to FurAffinity in a web browser and load cookies to download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your submissions notifications
DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them. DISCLAIMER: It is your own responsibility to check whether batch downloading is allowed by FurAffinity terms of service and to abide by them.

View file

@ -1,437 +1,38 @@
#!/usr/bin/python3 #!/usr/bin/python3
import argparse
import contextlib import contextlib
import http.cookiejar as cookielib import http.cookiejar as cookielib
import json
import os import os
import re
from time import sleep from time import sleep
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
import Modules.config as config
# COLORS from Modules.functions import download_complete
WHITE = "\033[1;37m" from Modules.functions import download
RED = "\033[1;91m" from Modules.functions import login
GREEN = "\033[1;92m" from Modules.functions import next_button
YELLOW = "\033[1;33m" from Modules.functions import system_message_handler
END = "\033[0m"
# Argument parsing
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description="Downloads the entire gallery/scraps/folder/favorites \
of a furaffinity user, or your submissions notifications",
epilog="""
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
python3 furaffinity-dl.py koul scraps -> will download scraps of user koul
python3 furaffinity-dl.py mylafox favorites -> will download favorites \
of user mylafox \n
You can also download a several users in one go like this:
python3 furaffinity-dl.py "koul radiquum mylafox" \
-> will download gallery of users koul -> radiquum -> mylafox
You can also provide a file with user names that are separated by a new line\n
You can also log in to FurAffinity in a web browser and load cookies to \
download age restricted content or submissions:
python3 furaffinity-dl.py letodoesart -c cookies.txt -> will download \
gallery of user letodoesart including Mature and Adult submissions
python3 furaffinity-dl.py --submissions -c cookies.txt -> will download your \
submissions notifications \n
DISCLAIMER: It is your own responsibility to check whether batch downloading \
is allowed by FurAffinity terms of service and to abide by them.
""",
)
parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
)
parser.add_argument(
"category",
nargs="?",
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
)
parser.add_argument(
"-sub",
"--submissions",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
nargs="+",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
)
parser.add_argument(
"-c", "--cookies", nargs="+", help="path to a NetScape cookies file"
)
parser.add_argument(
"-ua",
"--user-agent",
dest="user_agent",
nargs="+",
default=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0"
],
help="Your browser's useragent, may be required, depending on your luck",
)
parser.add_argument(
"--start", "-s", default=[1], help="page number to start from", nargs="+"
)
parser.add_argument(
"--stop",
"-S",
default=[0],
nargs="+",
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
)
parser.add_argument(
"--redownload",
"-rd",
dest="dont_redownload",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"--interval",
"-i",
type=int,
default=[0],
nargs="+",
help="delay between downloading pages in seconds [default: 0]",
)
parser.add_argument(
"--rating",
"-r",
action="store_false",
help="disable rating separation",
)
parser.add_argument(
"--filter",
action="store_true",
help="enable submission filter",
)
parser.add_argument(
"--metadata",
"-m",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission /view/12345678/",
)
parser.add_argument(
"-jd",
"--json-description",
dest="json_description",
action="store_true",
help="download description as a JSON list",
)
parser.add_argument(
"--login",
action="store_true",
help="extract furaffinity cookies directly from your browser",
)
parser.add_argument(
"--check",
action="store_true",
help="check and download latest submissions of a user",
)
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
)
args = parser.parse_args()
BASE_URL = "https://www.furaffinity.net"
if args.username != None:
username = args.username.split(" ")
category = args.category
# get session # get session
session = requests.session() session = requests.session()
session.headers.update({"User-Agent": args.user_agent[0]}) session.headers.update({"User-Agent": config.user_agent})
if args.cookies is not None: # add cookies if present if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(args.cookies[0]) cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load() cookies.load()
session.cookies = cookies session.cookies = cookies
# Functions
def download_file(url, fname, desc):
try:
r = session.get(url, stream=True)
if r.status_code != 200:
print(
f'{RED}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
unit="b",
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in r.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
except KeyboardInterrupt:
print(f"{GREEN}Finished downloading{END}")
os.remove(fname)
exit()
return True
def system_message_handler(s):
try:
message = {
s.find(class_="notice-message")
.find("div").find(class_="link-override").text.strip()
}
except AttributeError:
message = (
s.find("section", class_="aligncenter notice-message")
.find("div", class_="section-body alignleft")
.find("div", class_="redirect-message")
.text.strip()
)
print(f"{YELLOW}System Message: {message}{END}")
raise System_Message
def check_filter(title):
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
match = re.search(
search,
title,
re.IGNORECASE,
)
if match is not None and title == match.string:
return True
return None
def create_metadata(output, data, s, title, filename):
os.makedirs(f'{output}/metadata', exist_ok=True)
metadata = f"{output}/metadata/{title} - {filename}"
if args.rating is True:
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
metadata = f'{output}/{data.get("rating")}/metadata/{title} - {filename}'
# Extract description as list
if args.json_description is True:
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
data["tags"].append(tag.find("a").text)
except AttributeError:
print(f'{YELLOW}"{title}" has no tags{END}')
# Extract comments
for comment in s.findAll(class_="comment_container"):
temp_ele = comment.find(class_="comment-parent")
parent_cid = None if temp_ele is None else int(temp_ele.attrs.get("href")[5:])
# Comment is deleted or hidden
if comment.find(class_="comment-link") is None:
continue
data["comments"].append(
{
"cid": int(comment.find(class_="comment-link").attrs.get("href")[5:]),
"parent_cid": parent_cid,
"content": comment.find(class_="comment_text").contents[0].strip(),
"username": comment.find(class_="comment_username").text,
"date": comment.find(class_="popup_date").attrs.get("title"),
}
)
# Write a UTF-8 encoded JSON file for metadata
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def login():
import browser_cookie3
CJ = browser_cookie3.load()
response = session.get(BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
s.find(class_="loggedin_user_avatar")
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f"{GREEN}Logged in as: {account_username}{END}")
with open("cookies.txt", "w") as file:
file.write(
f"""# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.furaffinity.net TRUE / TRUE {cookie_a.expires} a {cookie_a.value}
.furaffinity.net TRUE / TRUE {cookie_b.expires} b {cookie_b.value}"""
)
print(
f'{GREEN}cookies saved successfully, now you can provide them \
by using "-c cookies.txt"{END}'
)
except AttributeError:
print(
f"{RED}Error getting cookies, either you need to login into \
furaffinity in your browser, or you can export cookies.txt manually{END}"
)
exit()
# File downloading
class Check_Complete(Exception):
pass
class System_Message(Exception):
pass
def download(path):
response = session.get(f"{BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
if s.find(class_="notice-message") is not None:
system_message_handler(s)
image = s.find(class_="download").find("a").attrs.get("href")
title = s.find(class_="submission-title").find("p").contents[0]
title = sanitize_filename(title)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if args.json_description is True:
dsc = []
filename = image.split("/")[-1:][0]
data = {
"id": int(path.split("/")[-2:-1][0]),
"filename": filename,
"author": s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text,
"date": s.find(class_="popup_date").attrs.get("title"),
"title": title,
"description": dsc,
"url": f"{BASE_URL}{path}",
"tags": [],
"category": s.find(class_="info").find(class_="category-name").text,
"type": s.find(class_="info").find(class_="type-name").text,
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": s.find(class_="rating-box").text.strip(),
"comments": [],
}
if args.filter is True and check_filter(title) is True:
print(f'{YELLOW}"{title}" was filtered and will not be \
downloaded - {data.get("url")}{END}')
return True
image_url = f"https:{image}"
output = f"{args.output_folder}/{data.get('author')}"
if category != "gallery":
output = f"{args.output_folder}/{data.get('author')}/{category}"
if args.folder is not None:
output = f"{args.output_folder}/{data.get('author')}/{folder[1]}"
os.makedirs(output, exist_ok=True)
filename = sanitize_filename(filename)
global output_path
output_path = f"{output}/{title} - {filename}"
if args.rating is True:
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
if args.dont_redownload is True and os.path.isfile(output_path):
if args.check is True:
print(f"{GREEN} Downloaded all recent files of \"{data.get('author')}\"")
raise Check_Complete
print(f'{YELLOW}Skipping "{title}" since it\'s already downloaded{END}')
return True
else:
download_file(
image_url,
output_path,
f'{title} - \
[{data.get("rating")}]',
)
if args.metadata is True:
create_metadata(output, data, s, title, filename)
return True
# Main function
def main(): def main():
page_end = args.stop[0]
page_num = args.start[0]
# download loop # download loop
with contextlib.suppress(Check_Complete, System_Message): page_num = config.start
with contextlib.suppress(download_complete):
while True: while True:
if page_end == page_num: if config.stop == page_num:
print(f"{YELLOW}Reached page {page_end}, stopping.{END}") print(
f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}'
)
break break
page_url = f"{download_url}/{page_num}" page_url = f"{download_url}/{page_num}"
@ -444,99 +45,79 @@ def main():
# End of gallery # End of gallery
if s.find(id="no-images") is not None: if s.find(id="no-images") is not None:
print(f"{GREEN}End of gallery{END}") print(f"{config.SUCCESS_COLOR}End of gallery{config.END}")
break break
# Download all images on the page # Download all images on the page
for img in s.findAll("figure"): for img in s.findAll("figure"):
download(img.find("a").attrs.get("href")) download(img.find("a").attrs.get("href"))
sleep(args.interval[0]) sleep(config.interval)
# Download submissions page_num = next_button(page_url)
if args.submissions is True:
try:
next_button = s.find("a", class_="button standard more").attrs.get(
"href"
)
except AttributeError:
try:
next_button = s.find(
"a", class_="button standard more-half"
).attrs.get("href")
except AttributeError:
print(f"{YELLOW}Unable to find next button{END}")
break
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
page_num = next_button.split("/")[-2]
page_url = f"{BASE_URL}{next_button}"
elif args.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
print(f"{YELLOW}Unable to find next button{END}")
break
page_num = next_button.parent.attrs["action"].split("/")[-2]
else:
next_button = s.find("a", class_="button standard right", text="Next")
if next_button is None:
print(f"{YELLOW}Unable to find next button{END}")
break
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
next_page_link = next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{YELLOW}Failed to parse next favorite link{END}")
break
page_num = f"{next_fav_num[0]}/next"
print(f"{WHITE}Downloading page {page_num} - {page_url} {END}")
print(
f"{GREEN}Finished \
downloading{END}"
)
if __name__ == "__main__": if __name__ == "__main__":
if args.login is True: if config.login is True:
login() login(config.user_agent)
try:
response = session.get(config.BASE_URL)
except ConnectionError:
print(f"{config.ERROR_COLOR}Connection failed{config.END}")
exit()
except KeyboardInterrupt:
print(f"{config.WARN_COLOR}Aborted by user{config.END}")
exit()
response = session.get(BASE_URL)
s = BeautifulSoup(response.text, "html.parser") s = BeautifulSoup(response.text, "html.parser")
if s.find(class_="loggedin_user_avatar") is not None: if s.find(class_="loggedin_user_avatar") is not None:
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt") account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
print(f'{GREEN}Logged in as "{account_username}"{END}') print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}')
else: else:
print(f"{YELLOW}Not logged in, NSFW content is inaccessible{END}") print(
f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}"
)
if args.download is not None: if config.download is not None:
download(args.download) download(config.download)
print(f'{GREEN}File saved as "{output_path}" {END}')
exit() exit()
if args.submissions is True: if config.submissions is True:
download_url = f"{BASE_URL}/msg/submissions" download_url = f"{config.BASE_URL}/msg/submissions"
main()
exit()
if args.folder is not None:
folder = args.folder[0].split("/")
download_url = f"{BASE_URL}/gallery/{username[0]}/folder/{args.folder[0]}"
main() main()
print(
f"{config.SUCCESS_COLOR}Finished \
downloading submissions{config.END}"
)
exit() exit()
if os.path.exists(username[0]): if config.folder is not None:
data = open(username[0]).read() folder = config.folder.split("/")
username = filter(None, data.split("\n")) download_url = (
f"{config.BASE_URL}/gallery/{config.username}/folder/{config.folder[1]}"
for username in username: )
print(f'{GREEN}Now downloading "{username}"{END}')
download_url = f"{BASE_URL}/{category}/{username}"
main() main()
print(
f'{config.SUCCESS_COLOR}Finished \
downloading "{config.folder[1]}"{config.END}'
)
exit()
if config.category not in ["gallery", "scraps", "favorites"]:
print(
f"{config.ERROR_COLOR}Please enter a valid category [gallery/scraps/favorites] {config.END}"
)
exit()
if os.path.exists(config.username[0]):
data = open(config.username[0]).read()
config.username = filter(None, data.split("\n"))
for username in config.username:
print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
download_url = f"{config.BASE_URL}/{config.category}/{username}"
main()
print(
f'{config.SUCCESS_COLOR}Finished \
downloading "{username}"{config.END}'
)

View file

@ -3,3 +3,4 @@ requests
tqdm tqdm
browser-cookie3 browser-cookie3
pathvalidate pathvalidate
pre-commit