changelog:

- workaround for NTFS filesystem when username ends with dot.
- ReadMe changes
- other minor changes
This commit is contained in:
Kentai Radiquum 2022-07-22 00:45:06 +05:00
parent 007f00b8ba
commit d610cd350e
No known key found for this signature in database
GPG key ID: CB1FC16C710DB347
8 changed files with 141 additions and 162 deletions

6
.gitignore vendored
View file

@ -11,7 +11,9 @@ cookies.txt
# Default download folder
Submissions/
# vscode stuff
.vscode
#Dev stuff
list.txt
.vscode
.idea
venv
__pycache__

View file

@ -1,4 +1,5 @@
import argparse
import os
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
@ -28,7 +29,8 @@ parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
user (if username is starting with '-' or '--' \
provide them through a file instead)",
)
parser.add_argument(
"category",
@ -36,14 +38,16 @@ parser.add_argument(
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
type=str,
)
parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file")
parser.add_argument("--cookies", "-c", help="path to a NetScape cookies file", type=str)
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
type=str,
)
parser.add_argument(
"--check",
@ -51,56 +55,53 @@ parser.add_argument(
help="check and download latest submissions of a user",
)
parser.add_argument(
"-ua",
"--user-agent",
"-ua",
dest="user_agent",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0",
help="Your browser's user agent, may be required, depending on your luck",
type=str,
)
parser.add_argument(
"-sub",
"--submissions",
"-sub",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
"-f",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
type=str,
)
parser.add_argument("--start", default=1, help="page number to start from", type=str)
parser.add_argument(
"-s",
"--start",
default=1,
help="page number to start from",
)
parser.add_argument(
"-S",
"--stop",
default=0,
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
type=str,
)
parser.add_argument(
"-rd",
"--redownload",
"-rd",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"-i",
"--interval",
type=int,
"-i",
default=0,
help="delay between downloading pages in seconds [default: 0]",
type=int,
)
parser.add_argument(
"-r",
"--rating",
"-r",
action="store_false",
help="disable rating separation",
)
@ -111,18 +112,17 @@ parser.add_argument(
help="enable submission filter",
)
parser.add_argument(
"-m",
"--metadata",
"-m",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission by providing its id",
"--download", help="download a specific submission by providing its id", type=str
)
parser.add_argument(
"-jd",
"--json-description",
"-jd",
dest="json_description",
action="store_true",
help="download description as a JSON list",
@ -147,6 +147,10 @@ category = args.category
if username is not None:
username = username.split(" ")
if os.path.exists(username[0]):
data = open(username[0]).read()
username = filter(None, data.split("\n"))
# Custom input
cookies = args.cookies
output_folder = args.output_folder
@ -199,4 +203,4 @@ search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
|\\bREF|\\bSale|auction|multislot|multi slot|stream|adopt'

View file

@ -1,28 +1,18 @@
import http.cookiejar as cookielib
import json
import os
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
import Modules.config as config
from Modules.functions import download_complete
from Modules.functions import DownloadComplete
from Modules.functions import requests_retry_session
from Modules.functions import system_message_handler
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
def download(path):
response = requests_retry_session(session=session).get(
f"{config.BASE_URL}{path}"
)
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
@ -32,7 +22,7 @@ def download(path):
image = s.find(class_="download").find("a").attrs.get("href")
except AttributeError:
print(
f"{config.ERROR_COLOR}uncessesful download of {config.BASE_URL}{path}{config.END}"
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}"
)
download(path)
return True
@ -40,10 +30,14 @@ def download(path):
filename = sanitize_filename(image.split("/")[-1:][0])
author = (
s.find(class_="submission-id-sub-container").find("a").find("strong").text
s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text.replace(".", "._")
)
title = sanitize_filename(
s.find(class_="submission-title").find("p").contents[0]
str(s.find(class_="submission-title").find("p").contents[0])
)
view_id = int(path.split("/")[-2:-1][0])
@ -70,18 +64,19 @@ def download(path):
image_url = f"https:{image}"
if download_file(image_url, output_path, f"{title} - [{rating}]") is True:
if (
download_file(
image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
)
is True
):
with open(
f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
) as idx:
idx.write(f"({view_id})\n")
if config.metadata is True:
dsc = (
s.find(class_="submission-description")
.text.strip()
.replace("\r\n", "\n")
)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if config.json_description is True:
dsc = []
data = {
@ -98,9 +93,7 @@ def download(path):
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(
s.find(class_="favorites").find(class_="font-large").text
),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": rating,
"comments": [],
}
@ -114,17 +107,17 @@ def download(path):
return True
def download_file(url, fname, desc):
def download_file(url, view_url, file_name, desc):
try:
r = session.get(url, stream=True)
r = requests_retry_session().get(url, stream=True)
if r.status_code != 200:
print(
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{config.END}'
"{file_name}" ({view_url}) ...skipping{config.END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
with open(file_name, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
@ -137,7 +130,7 @@ def download_file(url, fname, desc):
bar.update(size)
except KeyboardInterrupt:
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
os.remove(fname)
os.remove(file_name)
exit()
return True
@ -155,7 +148,7 @@ def create_metadata(output, data, s, title, filename):
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
# Extract tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
@ -194,7 +187,7 @@ def file_exists_fallback(author, title, view_id):
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
"{author}"{config.END}'
)
raise download_complete
raise DownloadComplete
print(
f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
it\'s already downloaded{config.END}'

View file

@ -9,14 +9,6 @@ from urllib3.util import Retry
import Modules.config as config
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
session.headers.update({"User-Agent": config.user_agent})
def requests_retry_session(
retries=3,
@ -24,7 +16,13 @@ def requests_retry_session(
status_forcelist=(500, 502, 504, 104),
session=None,
):
"""Get a session, and retry in case of an error"""
session = session or requests.Session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
session.headers.update({"User-Agent": config.user_agent})
retry = Retry(
total=retries,
read=retries,
@ -38,11 +36,12 @@ def requests_retry_session(
return session
class download_complete(Exception):
class DownloadComplete(Exception):
pass
def check_filter(title):
"""Compare post title and search string, then return 'True' if match found"""
match = re.search(
config.search,
@ -56,6 +55,7 @@ def check_filter(title):
def system_message_handler(s):
"""Parse and return system message text"""
try:
message = {
s.find(class_="notice-message")
@ -78,18 +78,19 @@ def system_message_handler(s):
.text.strip()
)
print(f"{config.WARN_COLOR}System Message: {message}{config.END}")
raise download_complete
raise DownloadComplete
def login():
"""Get cookies from any browser with logged in furaffinity and save them to file"""
session = requests.Session()
cj = browser_cookie3.load()
CJ = browser_cookie3.load()
response = session.get(config.BASE_URL, cookies=cj)
fa_cookies = cj._cookies[".furaffinity.net"]["/"]
response = session.get(config.BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
cookie_a = fa_cookies["a"]
cookie_b = fa_cookies["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
@ -116,48 +117,51 @@ furaffinity in your browser, or you can export cookies.txt manually{config.END}"
def next_button(page_url):
response = session.get(page_url)
"""Parse Next button and get next page url"""
response = requests_retry_session().get(page_url)
s = BeautifulSoup(response.text, "html.parser")
if config.submissions is True:
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
try:
next_button = s.find("a", class_="button standard more").attrs.get("href")
except AttributeError:
try:
next_button = s.find("a", class_="button standard more-half").attrs.get(
parse_next_button = s.find("a", class_="button standard more").attrs.get(
"href"
)
except AttributeError:
try:
parse_next_button = s.find(
"a", class_="button standard more-half"
).attrs.get("href")
except AttributeError as e:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete from e
page_num = next_button.split("/")[-2]
raise DownloadComplete from e
page_num = parse_next_button.split("/")[-2]
elif config.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
parse_next_button = s.find("button", class_="button standard", text="Next")
if parse_next_button is None or parse_next_button.parent is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
page_num = next_button.parent.attrs["action"].split("/")[-2]
raise DownloadComplete
page_num = parse_next_button.parent.attrs["action"].split("/")[-2]
else:
next_button = s.find("a", class_="button standard right", text="Next")
page_num = fav_next_button(s)
parse_next_button = s.find("a", class_="button standard right", text="Next")
page_num = fav_next_button(parse_next_button)
print(
f"Downloading page {page_num} - {config.BASE_URL}{next_button.parent.attrs['action']}"
f"Downloading page {page_num} - {config.BASE_URL}{parse_next_button.parent.attrs['action']}"
)
return page_num
def fav_next_button():
def fav_next_button(parse_next_button):
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
if next_button is None:
if parse_next_button is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
next_page_link = next_button.attrs["href"]
raise DownloadComplete
next_page_link = parse_next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
raise download_complete
raise DownloadComplete
return f"{next_fav_num[0]}/next"

View file

@ -9,8 +9,8 @@ import Modules.config as config
@lru_cache(maxsize=None)
def start_indexing(path, layer=0):
"""Recursively iterate over each item in path
and print item's name.
"""Recursively iterate over each item in path, then
save and print item's name.
"""
# make Path object from input string
@ -23,7 +23,7 @@ def start_indexing(path, layer=0):
if p.is_file():
name = p.stem
ext = p.suffix
match = re.search(r"\([0-9]{5,}\)", name)
match = re.search(r"\(\d{5,}\)", name)
if match is None and ext not in [".txt", ".idx"]:
return
@ -39,6 +39,7 @@ def start_indexing(path, layer=0):
@lru_cache(maxsize=None)
def check_file(path):
"""compare file view id with index list"""
view_id = path.split("/")[-2:-1][0]
with contextlib.suppress(FileNotFoundError):
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:

View file

@ -1,22 +1,20 @@
This branch is the development version of furaffinity-dl rewritten in python.
# FurAffinity Downloader
**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favourites) from furaffinity users users or your submissons!
It was written for preservation of culture, to counter the people nuking their galleries every once a while.
and then modified for confinience.
**furaffinity-dl** is a python script for batch downloading of galleries (and scraps/favorites) from furaffinity users users or your submission notifications!
Mainly it was written for preservation of culture, to counter the people nuking their galleries every once a while.
But no-one is restricting you from just using is for convenience.
Supports all known submission types: images, text, flash and audio.
## Requirements
`python 3`
`python3` (Recommended version is 3.10.x and above)
`pip3 install -r requirements.txt`
**The script currently only works with the "Modern" theme**
furaffinity-dl has been tested on Linux and Windows OSs, however it should also work on Mac or any other platform that supports python.
furaffinity-dl has only been tested only on Linux, however it should also work on Mac, Windows or any other platform that supports python.
***The script currently only works with the "Modern" theme***
## Usage
@ -24,41 +22,42 @@ When downloading a folder make sure to put everything after **/folder/**, for ex
```help
usage: furaffinity-dl.py [-h] [-c COOKIES] [--output OUTPUT_FOLDER] [--check] [-ua USER_AGENT] [-sub] [-f FOLDER] [-s START [START ...]]
[-S STOP] [-rd] [-i INTERVAL] [-r] [--filter] [-m] [--download DOWNLOAD] [-jd] [--login]
usage: furaffinity-dl.py [-h] [--cookies COOKIES] [--output OUTPUT_FOLDER] [--check] [--user-agent USER_AGENT] [--submissions] [--folder FOLDER] [--start START]
[--stop STOP] [--redownload] [--interval INTERVAL] [--rating] [--filter] [--metadata] [--download DOWNLOAD] [--json-description] [--login]
[--index]
[username] [category]
Downloads the entire gallery/scraps/folder/favorites of a furaffinity user, or your submissions notifications
positional arguments:
username username of the furaffinity user
username username of the furaffinity user (if username is starting with '-' or '--' provide them through a file instead)
category the category to download, gallery/scraps/favorites [default: gallery]
options:
-h, --help show this help message and exit
-c COOKIES, --cookies COOKIES
--cookies COOKIES, -c COOKIES
path to a NetScape cookies file
--output OUTPUT_FOLDER, -o OUTPUT_FOLDER
set a custom output folder
--check check and download latest submissions of a user
-ua USER_AGENT, --user-agent USER_AGENT
--user-agent USER_AGENT, -ua USER_AGENT
Your browser's user agent, may be required, depending on your luck
-sub, --submissions download your submissions
-f FOLDER, --folder FOLDER
--submissions, -sub download your submissions
--folder FOLDER, -f FOLDER
full path of the furaffinity gallery folder. for instance 123456/Folder-Name-Here
-s START [START ...], --start START [START ...]
page number to start from
-S STOP, --stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)
-rd, --redownload Redownload files that have been downloaded already
-i INTERVAL, --interval INTERVAL
--start START page number to start from
--stop STOP Page number to stop on. Specify the full URL after the username: for favorites pages (1234567890/next) or for submissions pages: (new~123456789@48)
--redownload, -rd Redownload files that have been downloaded already
--interval INTERVAL, -i INTERVAL
delay between downloading pages in seconds [default: 0]
-r, --rating disable rating separation
--rating, -r disable rating separation
--filter enable submission filter
-m, --metadata enable metadata saving
--download DOWNLOAD download a specific submission /view/12345678/
-jd, --json-description
--metadata, -m enable metadata saving
--download DOWNLOAD download a specific submission by providing its id
--json-description, -jd
download description as a JSON list
--login extract furaffinity cookies directly from your browser
--index create an index of downloaded files in an output folder
Examples:
python3 furaffinity-dl.py koul -> will download gallery of user koul
@ -77,15 +76,15 @@ DISCLAIMER: It is your own responsibility to check whether batch downloading is
```
You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, and use `python3 furaffinity-dl.py --login` to export furaffinity cookies from your web browser in Netscape format directly in file `cookies.txt` or export them manually with extensions: [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en), then you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent):
You can also log in to download restricted content. To do that, log in to FurAffinity in your web browser, and use `python3 furaffinity-dl.py --login` to export furaffinity cookies from your web browser in Netscape format directly in to the file `cookies.txt` or export them manually with extensions: [for Firefox](https://addons.mozilla.org/en-US/firefox/addon/ganbo/) and [for Chrome based browsers](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=en), then you can then pass them to the script with the `-c` flag, like this (you may also have to provide your user agent):
`python3 furaffinity-dl.py letodoesart -c cookies.txt --user_agent 'Mozilla/5.0 ....'`
`python3 furaffinity-dl.py letodoesart -c cookies.txt --user-agent 'Mozilla/5.0 ....'`
## TODO
<!-- ## TODO
- Download user profile information.
- "Classic" theme support
- Login without having to export cookies
- Login without having to export cookies -->
## Disclaimer

View file

@ -1,16 +1,14 @@
#!/usr/bin/python3
import contextlib
import http.cookiejar as cookielib
import os
from time import sleep
import requests
from bs4 import BeautifulSoup
import Modules.config as config
from Modules.download import download
from Modules.functions import check_filter
from Modules.functions import download_complete
from Modules.functions import DownloadComplete
from Modules.functions import login
from Modules.functions import next_button
from Modules.functions import requests_retry_session
@ -18,20 +16,11 @@ from Modules.functions import system_message_handler
from Modules.index import check_file
from Modules.index import start_indexing
# get session
session = requests.session()
session.headers.update({"User-Agent": config.user_agent})
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
def main():
# download loop
"""loop over and download all images on the page(s)"""
page_num = config.start
with contextlib.suppress(download_complete):
with contextlib.suppress(DownloadComplete):
while True:
if config.stop == page_num:
print(
@ -41,7 +30,7 @@ stopping.{config.END}'
break
page_url = f"{download_url}/{page_num}"
response = requests_retry_session(session=session).get(page_url)
response = requests_retry_session().get(page_url)
s = BeautifulSoup(response.text, "html.parser")
# System messages
@ -71,7 +60,7 @@ downloaded - {config.BASE_URL}{img_url}{config.END}'
f'{config.SUCCESS_COLOR}Downloaded all recent files of \
"{username}"{config.END}'
)
raise download_complete
raise DownloadComplete
print(
f'{config.WARN_COLOR}Skipping "{title}" since \
it\'s already downloaded{config.END}'
@ -96,15 +85,12 @@ if __name__ == "__main__":
print(f"{config.SUCCESS_COLOR}indexing finished{config.END}")
exit()
try:
response = requests_retry_session(session=session).get(config.BASE_URL)
except KeyboardInterrupt:
print(f"{config.WARN_COLOR}Aborted by user{config.END}")
exit()
s = BeautifulSoup(response.text, "html.parser")
if s.find(class_="loggedin_user_avatar") is not None:
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
one_time_response = requests_retry_session().get(config.BASE_URL)
one_time_s = BeautifulSoup(one_time_response.text, "html.parser")
if one_time_s.find(class_="loggedin_user_avatar") is not None:
account_username = one_time_s.find(class_="loggedin_user_avatar").attrs.get(
"alt"
)
print(
f'{config.SUCCESS_COLOR}Logged in as \
"{account_username}"{config.END}'
@ -146,17 +132,6 @@ downloading "{config.folder[1]}"{config.END}'
)
exit()
try:
if os.path.exists(config.username[0]):
data = open(config.username[0]).read()
config.username = filter(None, data.split("\n"))
except TypeError or AttributeError:
print(
f"{config.ERROR_COLOR}Please enter a username \
or provide a file with usernames (1 username per line){config.END}"
)
exit()
for username in config.username:
username = username.split("#")[0].translate(
str.maketrans(config.username_replace_chars)

View file

@ -1,6 +1,7 @@
beautifulsoup4
urllib3
requests
beautifulsoup4
tqdm
browser-cookie3
pathvalidate
pre-commit
browser-cookie3