mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-05 15:54:38 +00:00
1.Fix multiple crashes when download file
2.Add option to detect comission's category, also introduce new dir structure 3.Check file size when download complete to prevent corrupted file 4.Add option to enable request cache 5.Add option to force check file size to detect corrupted file 6.Fix multiple edge case that will caused exception and corrupt file 7.Fix stack overflow in some cases 8.Add max tries limit when download
This commit is contained in:
parent
df508cabbe
commit
976ed6f12c
5 changed files with 117 additions and 20 deletions
|
@ -144,6 +144,25 @@ parser.add_argument(
|
|||
action="store_true",
|
||||
help="create an index of downloaded files in an output folder",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--real-category",
|
||||
dest="real_category",
|
||||
action="store_true",
|
||||
help="this will download to the sub folder of its real category. it's useful when download favorites to avoid duplicate files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--request-compress",
|
||||
dest="request_compress",
|
||||
action="store_true",
|
||||
help="enable request compress which may save some bandwidth, but less file can be check by content-length. " +
|
||||
"Since images won't be compress by default, it won't take much side effect to disable it by default",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check-file-size",
|
||||
dest="check_file_size",
|
||||
action="store_true",
|
||||
help="check all files size when download, this will skip build-in archive",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -180,6 +199,14 @@ metadata = args.metadata
|
|||
dont_redownload = args.redownload
|
||||
rating = args.rating
|
||||
submission_filter = args.submission_filter
|
||||
real_category = args.real_category
|
||||
request_compress = args.request_compress
|
||||
check_file_size = args.check_file_size
|
||||
|
||||
if check_file_size:
|
||||
request_compress = False
|
||||
index = False
|
||||
|
||||
|
||||
# Colors
|
||||
SUCCESS_COLOR = "\033[1;92m"
|
||||
|
|
|
@ -11,21 +11,26 @@ from Modules.functions import requests_retry_session
|
|||
from Modules.functions import system_message_handler
|
||||
|
||||
|
||||
def download(path):
|
||||
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
system_message_handler(s)
|
||||
def download(path, max_retries=5):
|
||||
if max_retries < 0:
|
||||
return False
|
||||
try:
|
||||
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
|
||||
|
||||
s = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# System messages
|
||||
if s.find(class_="notice-message") is not None:
|
||||
system_message_handler(s)
|
||||
image = s.find(class_="download").find("a").attrs.get("href")
|
||||
except AttributeError:
|
||||
print(
|
||||
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}"
|
||||
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path} remains retries {max_retries}{config.END}"
|
||||
)
|
||||
download(path)
|
||||
return True
|
||||
return download(path, max_retries - 1)
|
||||
except Exception as e:
|
||||
print(f"{config.ERROR_COLOR}exception when download {config.BASE_URL}{path} remains retries {max_retries}, error {e}{config.END}")
|
||||
return download(path, max_retries - 1)
|
||||
|
||||
filename = sanitize_filename(image.split("/")[-1:][0])
|
||||
|
||||
|
@ -44,10 +49,14 @@ def download(path):
|
|||
output = f"{config.output_folder}/{author}"
|
||||
rating = s.find(class_="rating-box").text.strip()
|
||||
|
||||
if config.category != "gallery":
|
||||
output = f"{config.output_folder}/{author}/{config.category}"
|
||||
if config.folder is not None:
|
||||
output = f"{config.output_folder}/{author}/{config.folder}"
|
||||
if config.real_category:
|
||||
real_category = get_image_cateory(s)
|
||||
output = f"{config.output_folder}/{author}/{real_category}"
|
||||
else:
|
||||
if config.category != "gallery":
|
||||
output = f"{config.output_folder}/{author}/{config.category}"
|
||||
if config.folder is not None:
|
||||
output = f"{config.output_folder}/{author}/{config.folder}"
|
||||
os.makedirs(output, exist_ok=True)
|
||||
|
||||
output_path = f"{output}/{title} ({view_id}) - {filename}"
|
||||
|
@ -57,13 +66,21 @@ def download(path):
|
|||
output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
|
||||
output_path_fb = f"{output}/{rating}/{title} - {filename}"
|
||||
|
||||
image_url = f"https:{image}"
|
||||
|
||||
if config.check_file_size and (
|
||||
os.path.isfile(output_path_fb) or os.path.isfile(output_path)
|
||||
):
|
||||
content_length = get_content_length(image_url)
|
||||
delete_file_if_mismatch_size(output_path_fb, content_length)
|
||||
delete_file_if_mismatch_size(output_path, content_length)
|
||||
|
||||
|
||||
if config.dont_redownload is True and (
|
||||
os.path.isfile(output_path_fb) or os.path.isfile(output_path)
|
||||
):
|
||||
return file_exists_fallback(author, title, view_id)
|
||||
|
||||
image_url = f"https:{image}"
|
||||
|
||||
if (
|
||||
download_file(
|
||||
image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
|
||||
|
@ -74,6 +91,8 @@ def download(path):
|
|||
f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
|
||||
) as idx:
|
||||
idx.write(f"({view_id})\n")
|
||||
else:
|
||||
return download(path, max_retries - 1)
|
||||
|
||||
if config.metadata is True:
|
||||
if config.html_description is True:
|
||||
|
@ -120,6 +139,7 @@ def download_file(url, view_url, file_name, desc):
|
|||
)
|
||||
return False
|
||||
total = int(r.headers.get("Content-Length", 0))
|
||||
encoding = r.headers.get('Content-Encoding', '')
|
||||
with open(file_name, "wb") as file, tqdm(
|
||||
desc=desc.ljust(40),
|
||||
total=total,
|
||||
|
@ -135,8 +155,42 @@ def download_file(url, view_url, file_name, desc):
|
|||
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
|
||||
os.remove(file_name)
|
||||
exit()
|
||||
except Exception as e:
|
||||
os.remove(file_name)
|
||||
print(f"{config.ERROR_COLOR}Download {file_name} ({view_url}) failed, error {e}. Remove file...{config.END}")
|
||||
return False
|
||||
|
||||
# if webserver doesn't compress file, we should check file size
|
||||
if len(encoding) == 0 and delete_file_if_mismatch_size(file_name, total):
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_content_length(url):
|
||||
try:
|
||||
with requests_retry_session().get(url, stream=True) as r:
|
||||
if r.status_code != 200:
|
||||
print(
|
||||
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while get content length \
|
||||
"{url}" ...return 0{config.END}'
|
||||
)
|
||||
return 0
|
||||
content_length = r.headers.get("Content-Length", 0)
|
||||
return int(content_length)
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
def delete_file_if_mismatch_size(path, target_size):
|
||||
if type(target_size) != int:
|
||||
target_size = int(target_size)
|
||||
if target_size <= 0 or not os.path.isfile(path):
|
||||
return False
|
||||
file_size = os.path.getsize(path)
|
||||
if file_size != target_size:
|
||||
print(f"{config.ERROR_COLOR}File size {file_size}b mismatch {target_size}b: delete file {path}{config.END}")
|
||||
os.remove(path)
|
||||
return True
|
||||
return False
|
||||
|
||||
def create_metadata(output, data, s, title, filename):
|
||||
if config.rating is True:
|
||||
|
@ -183,8 +237,10 @@ def create_metadata(output, data, s, title, filename):
|
|||
|
||||
|
||||
def file_exists_fallback(author, title, view_id):
|
||||
with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
|
||||
idx.write(f"({view_id})\n")
|
||||
# do not write to index when check file size is enabled
|
||||
if not config.check_file_size:
|
||||
with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
|
||||
idx.write(f"({view_id})\n")
|
||||
if config.check is True:
|
||||
print(
|
||||
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
|
||||
|
@ -196,3 +252,10 @@ def file_exists_fallback(author, title, view_id):
|
|||
it\'s already downloaded{config.END}'
|
||||
)
|
||||
return True
|
||||
|
||||
def get_image_cateory(s):
|
||||
if s.find(class_ = 'button standard mobile-fix', string = 'Main Gallery') is not None:
|
||||
return 'gallery'
|
||||
elif s.find(class_='button standard mobile-fix', string = 'Scraps') is not None:
|
||||
return 'scraps'
|
||||
return 'unknown'
|
|
@ -18,6 +18,8 @@ def requests_retry_session(
|
|||
):
|
||||
"""Get a session, and retry in case of an error"""
|
||||
session = session or requests.Session()
|
||||
if not config.request_compress:
|
||||
session.headers.update({'Accept-Encoding': 'identity'})
|
||||
if config.cookies is not None: # add cookies if present
|
||||
cookies = cookielib.MozillaCookieJar(config.cookies)
|
||||
cookies.load()
|
||||
|
|
|
@ -40,6 +40,8 @@ def start_indexing(path, layer=0):
|
|||
@lru_cache(maxsize=None)
|
||||
def check_file(path):
|
||||
"""compare file view id with index list"""
|
||||
if config.check_file_size:
|
||||
return False
|
||||
view_id = path.split("/")[-2:-1][0]
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
|
||||
|
|
|
@ -54,12 +54,15 @@ options:
|
|||
--filter enable submission filter
|
||||
--metadata, -m enable metadata saving
|
||||
--download DOWNLOAD download a specific submission by providing its id
|
||||
--html-description, -hd
|
||||
download description as original html format, this won't work if json-description is enabled
|
||||
--json-description, -jd
|
||||
download description as a JSON list
|
||||
--html-description, -hd
|
||||
download description as original html format, this won't work if json-description is enabled
|
||||
--login extract furaffinity cookies directly from your browser
|
||||
--index create an index of downloaded files in an output folder
|
||||
--real-category this will download to its real category sub folder. it's useful when download favorites to avoid duplicate files
|
||||
--request-compress enable request compress which may save some bandwidth, but less file can be check by content-length. Since images won't be compress by default, it won't take much side effect to disable it by default
|
||||
--check-file-size check all files size when download, this will skip build-in archive
|
||||
|
||||
Examples:
|
||||
python3 furaffinity-dl.py koul -> will download gallery of user koul
|
||||
|
|
Loading…
Add table
Reference in a new issue