changelog:

- workaround for NTFS filesystem when username ends with dot.
- ReadMe changes
- other minor changes
This commit is contained in:
Kentai Radiquum 2022-07-22 00:45:06 +05:00
parent 007f00b8ba
commit d610cd350e
No known key found for this signature in database
GPG key ID: CB1FC16C710DB347
8 changed files with 141 additions and 162 deletions

View file

@ -1,4 +1,5 @@
import argparse
import os
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
@ -28,7 +29,8 @@ parser.add_argument(
"username",
nargs="?",
help="username of the furaffinity \
user",
user (if username is starting with '-' or '--' \
provide them through a file instead)",
)
parser.add_argument(
"category",
@ -36,14 +38,16 @@ parser.add_argument(
help="the category to download, gallery/scraps/favorites \
[default: gallery]",
default="gallery",
type=str,
)
parser.add_argument("-c", "--cookies", help="path to a NetScape cookies file")
parser.add_argument("--cookies", "-c", help="path to a NetScape cookies file", type=str)
parser.add_argument(
"--output",
"-o",
dest="output_folder",
default="Submissions",
help="set a custom output folder",
type=str,
)
parser.add_argument(
"--check",
@ -51,56 +55,53 @@ parser.add_argument(
help="check and download latest submissions of a user",
)
parser.add_argument(
"-ua",
"--user-agent",
"-ua",
dest="user_agent",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 \
Firefox/101.0",
help="Your browser's useragent, may be required, depending on your luck",
help="Your browser's user agent, may be required, depending on your luck",
type=str,
)
parser.add_argument(
"-sub",
"--submissions",
"-sub",
action="store_true",
help="download your \
submissions",
)
parser.add_argument(
"-f",
"--folder",
"-f",
help="full path of the furaffinity gallery folder. for instance 123456/\
Folder-Name-Here",
type=str,
)
parser.add_argument("--start", default=1, help="page number to start from", type=str)
parser.add_argument(
"-s",
"--start",
default=1,
help="page number to start from",
)
parser.add_argument(
"-S",
"--stop",
default=0,
help="Page number to stop on. Specify the full URL after the username: for \
favorites pages (1234567890/next) or for submissions pages: \
(new~123456789@48)",
type=str,
)
parser.add_argument(
"-rd",
"--redownload",
"-rd",
action="store_false",
help="Redownload files that have been downloaded already",
)
parser.add_argument(
"-i",
"--interval",
type=int,
"-i",
default=0,
help="delay between downloading pages in seconds [default: 0]",
type=int,
)
parser.add_argument(
"-r",
"--rating",
"-r",
action="store_false",
help="disable rating separation",
)
@ -111,18 +112,17 @@ parser.add_argument(
help="enable submission filter",
)
parser.add_argument(
"-m",
"--metadata",
"-m",
action="store_true",
help="enable metadata saving",
)
parser.add_argument(
"--download",
help="download a specific submission by providing its id",
"--download", help="download a specific submission by providing its id", type=str
)
parser.add_argument(
"-jd",
"--json-description",
"-jd",
dest="json_description",
action="store_true",
help="download description as a JSON list",
@ -147,6 +147,10 @@ category = args.category
if username is not None:
username = username.split(" ")
if os.path.exists(username[0]):
data = open(username[0]).read()
username = filter(None, data.split("\n"))
# Custom input
cookies = args.cookies
output_folder = args.output_folder
@ -199,4 +203,4 @@ search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|REM[insder]*\\b\
|\\bREF|\\bSale|auction|multislot|stream|adopt'
|\\bREF|\\bSale|auction|multislot|multi slot|stream|adopt'

View file

@ -1,28 +1,18 @@
import http.cookiejar as cookielib
import json
import os
import requests
from bs4 import BeautifulSoup
from pathvalidate import sanitize_filename
from tqdm import tqdm
import Modules.config as config
from Modules.functions import download_complete
from Modules.functions import DownloadComplete
from Modules.functions import requests_retry_session
from Modules.functions import system_message_handler
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
def download(path):
response = requests_retry_session(session=session).get(
f"{config.BASE_URL}{path}"
)
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
s = BeautifulSoup(response.text, "html.parser")
# System messages
@ -32,7 +22,7 @@ def download(path):
image = s.find(class_="download").find("a").attrs.get("href")
except AttributeError:
print(
f"{config.ERROR_COLOR}uncessesful download of {config.BASE_URL}{path}{config.END}"
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}"
)
download(path)
return True
@ -40,10 +30,14 @@ def download(path):
filename = sanitize_filename(image.split("/")[-1:][0])
author = (
s.find(class_="submission-id-sub-container").find("a").find("strong").text
s.find(class_="submission-id-sub-container")
.find("a")
.find("strong")
.text.replace(".", "._")
)
title = sanitize_filename(
s.find(class_="submission-title").find("p").contents[0]
str(s.find(class_="submission-title").find("p").contents[0])
)
view_id = int(path.split("/")[-2:-1][0])
@ -70,18 +64,19 @@ def download(path):
image_url = f"https:{image}"
if download_file(image_url, output_path, f"{title} - [{rating}]") is True:
if (
download_file(
image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
)
is True
):
with open(
f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
) as idx:
idx.write(f"({view_id})\n")
if config.metadata is True:
dsc = (
s.find(class_="submission-description")
.text.strip()
.replace("\r\n", "\n")
)
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
if config.json_description is True:
dsc = []
data = {
@ -98,9 +93,7 @@ def download(path):
"species": s.find(class_="info").findAll("div")[2].find("span").text,
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
"views": int(s.find(class_="views").find(class_="font-large").text),
"favorites": int(
s.find(class_="favorites").find(class_="font-large").text
),
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
"rating": rating,
"comments": [],
}
@ -114,17 +107,17 @@ def download(path):
return True
def download_file(url, fname, desc):
def download_file(url, view_url, file_name, desc):
try:
r = session.get(url, stream=True)
r = requests_retry_session().get(url, stream=True)
if r.status_code != 200:
print(
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while downloading \
"{fname}". URL {url} ...skipping{config.END}'
"{file_name}" ({view_url}) ...skipping{config.END}'
)
return False
total = int(r.headers.get("Content-Length", 0))
with open(fname, "wb") as file, tqdm(
with open(file_name, "wb") as file, tqdm(
desc=desc.ljust(40),
total=total,
miniters=100,
@ -137,7 +130,7 @@ def download_file(url, fname, desc):
bar.update(size)
except KeyboardInterrupt:
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
os.remove(fname)
os.remove(file_name)
exit()
return True
@ -155,7 +148,7 @@ def create_metadata(output, data, s, title, filename):
for desc in s.find("div", class_="submission-description").stripped_strings:
data["description"].append(desc)
# Extact tags
# Extract tags
try:
for tag in s.find(class_="tags-row").findAll(class_="tags"):
@ -194,7 +187,7 @@ def file_exists_fallback(author, title, view_id):
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
"{author}"{config.END}'
)
raise download_complete
raise DownloadComplete
print(
f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
it\'s already downloaded{config.END}'

View file

@ -9,14 +9,6 @@ from urllib3.util import Retry
import Modules.config as config
session = requests.session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
session.headers.update({"User-Agent": config.user_agent})
def requests_retry_session(
retries=3,
@ -24,7 +16,13 @@ def requests_retry_session(
status_forcelist=(500, 502, 504, 104),
session=None,
):
"""Get a session, and retry in case of an error"""
session = session or requests.Session()
if config.cookies is not None: # add cookies if present
cookies = cookielib.MozillaCookieJar(config.cookies)
cookies.load()
session.cookies = cookies
session.headers.update({"User-Agent": config.user_agent})
retry = Retry(
total=retries,
read=retries,
@ -38,11 +36,12 @@ def requests_retry_session(
return session
class download_complete(Exception):
class DownloadComplete(Exception):
pass
def check_filter(title):
"""Compare post title and search string, then return 'True' if match found"""
match = re.search(
config.search,
@ -56,6 +55,7 @@ def check_filter(title):
def system_message_handler(s):
"""Parse and return system message text"""
try:
message = {
s.find(class_="notice-message")
@ -78,18 +78,19 @@ def system_message_handler(s):
.text.strip()
)
print(f"{config.WARN_COLOR}System Message: {message}{config.END}")
raise download_complete
raise DownloadComplete
def login():
"""Get cookies from any browser with logged in furaffinity and save them to file"""
session = requests.Session()
cj = browser_cookie3.load()
CJ = browser_cookie3.load()
response = session.get(config.BASE_URL, cookies=cj)
fa_cookies = cj._cookies[".furaffinity.net"]["/"]
response = session.get(config.BASE_URL, cookies=CJ)
FA_COOKIES = CJ._cookies[".furaffinity.net"]["/"]
cookie_a = FA_COOKIES["a"]
cookie_b = FA_COOKIES["b"]
cookie_a = fa_cookies["a"]
cookie_b = fa_cookies["b"]
s = BeautifulSoup(response.text, "html.parser")
try:
@ -116,48 +117,51 @@ furaffinity in your browser, or you can export cookies.txt manually{config.END}"
def next_button(page_url):
response = session.get(page_url)
"""Parse Next button and get next page url"""
response = requests_retry_session().get(page_url)
s = BeautifulSoup(response.text, "html.parser")
if config.submissions is True:
# unlike galleries that are sequentially numbered, submissions use a different scheme.
# the "page_num" is instead: new~[set of numbers]@(12 or 48 or 72) if sorting by new
try:
next_button = s.find("a", class_="button standard more").attrs.get("href")
parse_next_button = s.find("a", class_="button standard more").attrs.get(
"href"
)
except AttributeError:
try:
next_button = s.find("a", class_="button standard more-half").attrs.get(
"href"
)
parse_next_button = s.find(
"a", class_="button standard more-half"
).attrs.get("href")
except AttributeError as e:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete from e
page_num = next_button.split("/")[-2]
raise DownloadComplete from e
page_num = parse_next_button.split("/")[-2]
elif config.category != "favorites":
next_button = s.find("button", class_="button standard", text="Next")
if next_button is None or next_button.parent is None:
parse_next_button = s.find("button", class_="button standard", text="Next")
if parse_next_button is None or parse_next_button.parent is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
page_num = next_button.parent.attrs["action"].split("/")[-2]
raise DownloadComplete
page_num = parse_next_button.parent.attrs["action"].split("/")[-2]
else:
next_button = s.find("a", class_="button standard right", text="Next")
page_num = fav_next_button(s)
parse_next_button = s.find("a", class_="button standard right", text="Next")
page_num = fav_next_button(parse_next_button)
print(
f"Downloading page {page_num} - {config.BASE_URL}{next_button.parent.attrs['action']}"
f"Downloading page {page_num} - {config.BASE_URL}{parse_next_button.parent.attrs['action']}"
)
return page_num
def fav_next_button():
def fav_next_button(parse_next_button):
# unlike galleries that are sequentially numbered, favorites use a different scheme.
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
if next_button is None:
if parse_next_button is None:
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
raise download_complete
next_page_link = next_button.attrs["href"]
raise DownloadComplete
next_page_link = parse_next_button.attrs["href"]
next_fav_num = re.search(r"\d+", next_page_link)
if next_fav_num is None:
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
raise download_complete
raise DownloadComplete
return f"{next_fav_num[0]}/next"

View file

@ -9,8 +9,8 @@ import Modules.config as config
@lru_cache(maxsize=None)
def start_indexing(path, layer=0):
"""Recursively iterate over each item in path
and print item's name.
"""Recursively iterate over each item in path, then
save and print item's name.
"""
# make Path object from input string
@ -23,7 +23,7 @@ def start_indexing(path, layer=0):
if p.is_file():
name = p.stem
ext = p.suffix
match = re.search(r"\([0-9]{5,}\)", name)
match = re.search(r"\(\d{5,}\)", name)
if match is None and ext not in [".txt", ".idx"]:
return
@ -39,6 +39,7 @@ def start_indexing(path, layer=0):
@lru_cache(maxsize=None)
def check_file(path):
"""compare file view id with index list"""
view_id = path.split("/")[-2:-1][0]
with contextlib.suppress(FileNotFoundError):
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx: