mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-05 15:54:38 +00:00
Merge pull request #4 from ovear/python
Some stability and functional fix
This commit is contained in:
commit
b9d958c6c1
5 changed files with 121 additions and 21 deletions
|
@ -144,6 +144,25 @@ parser.add_argument(
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="create an index of downloaded files in an output folder",
|
help="create an index of downloaded files in an output folder",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--real-category",
|
||||||
|
dest="real_category",
|
||||||
|
action="store_true",
|
||||||
|
help="this will download to the sub folder of its real category. it's useful when download favorites to avoid duplicate files",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--request-compress",
|
||||||
|
dest="request_compress",
|
||||||
|
action="store_true",
|
||||||
|
help="enable request compress which may save some bandwidth, but less file can be check by content-length. " +
|
||||||
|
"Since images won't be compress by default, it won't take much side effect to disable it by default",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--check-file-size",
|
||||||
|
dest="check_file_size",
|
||||||
|
action="store_true",
|
||||||
|
help="check all files size when download, this will skip build-in archive",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -180,6 +199,14 @@ metadata = args.metadata
|
||||||
dont_redownload = args.redownload
|
dont_redownload = args.redownload
|
||||||
rating = args.rating
|
rating = args.rating
|
||||||
submission_filter = args.submission_filter
|
submission_filter = args.submission_filter
|
||||||
|
real_category = args.real_category
|
||||||
|
request_compress = args.request_compress
|
||||||
|
check_file_size = args.check_file_size
|
||||||
|
|
||||||
|
if check_file_size:
|
||||||
|
request_compress = False
|
||||||
|
index = False
|
||||||
|
|
||||||
|
|
||||||
# Colors
|
# Colors
|
||||||
SUCCESS_COLOR = "\033[1;92m"
|
SUCCESS_COLOR = "\033[1;92m"
|
||||||
|
|
|
@ -11,21 +11,26 @@ from Modules.functions import requests_retry_session
|
||||||
from Modules.functions import system_message_handler
|
from Modules.functions import system_message_handler
|
||||||
|
|
||||||
|
|
||||||
def download(path):
|
def download(path, max_retries=5):
|
||||||
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
|
if max_retries < 0:
|
||||||
s = BeautifulSoup(response.text, "html.parser")
|
return False
|
||||||
|
|
||||||
# System messages
|
|
||||||
if s.find(class_="notice-message") is not None:
|
|
||||||
system_message_handler(s)
|
|
||||||
try:
|
try:
|
||||||
|
response = requests_retry_session().get(f"{config.BASE_URL}{path}")
|
||||||
|
|
||||||
|
s = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
# System messages
|
||||||
|
if s.find(class_="notice-message") is not None:
|
||||||
|
system_message_handler(s)
|
||||||
image = s.find(class_="download").find("a").attrs.get("href")
|
image = s.find(class_="download").find("a").attrs.get("href")
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
print(
|
print(
|
||||||
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path}{config.END}"
|
f"{config.ERROR_COLOR}unsuccessful download of {config.BASE_URL}{path} remains retries {max_retries}{config.END}"
|
||||||
)
|
)
|
||||||
download(path)
|
return download(path, max_retries - 1)
|
||||||
return True
|
except Exception as e:
|
||||||
|
print(f"{config.ERROR_COLOR}exception when download {config.BASE_URL}{path} remains retries {max_retries}, error {e}{config.END}")
|
||||||
|
return download(path, max_retries - 1)
|
||||||
|
|
||||||
filename = sanitize_filename(image.split("/")[-1:][0])
|
filename = sanitize_filename(image.split("/")[-1:][0])
|
||||||
|
|
||||||
|
@ -44,10 +49,14 @@ def download(path):
|
||||||
output = f"{config.output_folder}/{author}"
|
output = f"{config.output_folder}/{author}"
|
||||||
rating = s.find(class_="rating-box").text.strip()
|
rating = s.find(class_="rating-box").text.strip()
|
||||||
|
|
||||||
if config.category != "gallery":
|
if config.real_category:
|
||||||
output = f"{config.output_folder}/{author}/{config.category}"
|
real_category = get_image_cateory(s)
|
||||||
if config.folder is not None:
|
output = f"{config.output_folder}/{author}/{real_category}"
|
||||||
output = f"{config.output_folder}/{author}/{config.folder}"
|
else:
|
||||||
|
if config.category != "gallery":
|
||||||
|
output = f"{config.output_folder}/{author}/{config.category}"
|
||||||
|
if config.folder is not None:
|
||||||
|
output = f"{config.output_folder}/{author}/{config.folder}"
|
||||||
os.makedirs(output, exist_ok=True)
|
os.makedirs(output, exist_ok=True)
|
||||||
|
|
||||||
output_path = f"{output}/{title} ({view_id}) - {filename}"
|
output_path = f"{output}/{title} ({view_id}) - {filename}"
|
||||||
|
@ -57,13 +66,21 @@ def download(path):
|
||||||
output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
|
output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
|
||||||
output_path_fb = f"{output}/{rating}/{title} - {filename}"
|
output_path_fb = f"{output}/{rating}/{title} - {filename}"
|
||||||
|
|
||||||
|
image_url = f"https:{image}"
|
||||||
|
|
||||||
|
if config.check_file_size and (
|
||||||
|
os.path.isfile(output_path_fb) or os.path.isfile(output_path)
|
||||||
|
):
|
||||||
|
content_length = get_content_length(image_url)
|
||||||
|
delete_file_if_mismatch_size(output_path_fb, content_length)
|
||||||
|
delete_file_if_mismatch_size(output_path, content_length)
|
||||||
|
|
||||||
|
|
||||||
if config.dont_redownload is True and (
|
if config.dont_redownload is True and (
|
||||||
os.path.isfile(output_path_fb) or os.path.isfile(output_path)
|
os.path.isfile(output_path_fb) or os.path.isfile(output_path)
|
||||||
):
|
):
|
||||||
return file_exists_fallback(author, title, view_id)
|
return file_exists_fallback(author, title, view_id)
|
||||||
|
|
||||||
image_url = f"https:{image}"
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
download_file(
|
download_file(
|
||||||
image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
|
image_url, f"{config.BASE_URL}{path}", output_path, f"{title} - [{rating}]"
|
||||||
|
@ -74,6 +91,8 @@ def download(path):
|
||||||
f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
|
f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+"
|
||||||
) as idx:
|
) as idx:
|
||||||
idx.write(f"({view_id})\n")
|
idx.write(f"({view_id})\n")
|
||||||
|
else:
|
||||||
|
return download(path, max_retries - 1)
|
||||||
|
|
||||||
if config.metadata is True:
|
if config.metadata is True:
|
||||||
if config.html_description is True:
|
if config.html_description is True:
|
||||||
|
@ -120,6 +139,7 @@ def download_file(url, view_url, file_name, desc):
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
total = int(r.headers.get("Content-Length", 0))
|
total = int(r.headers.get("Content-Length", 0))
|
||||||
|
encoding = r.headers.get('Content-Encoding', '')
|
||||||
with open(file_name, "wb") as file, tqdm(
|
with open(file_name, "wb") as file, tqdm(
|
||||||
desc=desc.ljust(40),
|
desc=desc.ljust(40),
|
||||||
total=total,
|
total=total,
|
||||||
|
@ -135,8 +155,43 @@ def download_file(url, view_url, file_name, desc):
|
||||||
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
|
print(f"{config.SUCCESS_COLOR}Finished downloading{config.END}")
|
||||||
os.remove(file_name)
|
os.remove(file_name)
|
||||||
exit()
|
exit()
|
||||||
|
except Exception as e:
|
||||||
|
os.remove(file_name)
|
||||||
|
print(f"{config.ERROR_COLOR}Download {file_name} ({view_url}) failed, error {e}. Remove file...{config.END}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# if webserver doesn't compress file, we should check file size
|
||||||
|
if len(encoding) == 0 and delete_file_if_mismatch_size(file_name, total):
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_content_length(url):
|
||||||
|
try:
|
||||||
|
with requests_retry_session().get(url, stream=True) as r:
|
||||||
|
if r.status_code != 200:
|
||||||
|
print(
|
||||||
|
f'{config.ERROR_COLOR}Got a HTTP {r.status_code} while get content length \
|
||||||
|
"{url}" ...return 0{config.END}'
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
content_length = r.headers.get("Content-Length", 0)
|
||||||
|
return int(content_length)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'{config.ERROR_COLOR}Can not get content length for {url}...{config.END}')
|
||||||
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def delete_file_if_mismatch_size(path, target_size):
|
||||||
|
if type(target_size) != int:
|
||||||
|
target_size = int(target_size)
|
||||||
|
if target_size <= 0 or not os.path.isfile(path):
|
||||||
|
return False
|
||||||
|
file_size = os.path.getsize(path)
|
||||||
|
if file_size != target_size:
|
||||||
|
print(f"{config.ERROR_COLOR}File size {file_size}b mismatch {target_size}b: delete file {path}{config.END}")
|
||||||
|
os.remove(path)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def create_metadata(output, data, s, title, filename):
|
def create_metadata(output, data, s, title, filename):
|
||||||
if config.rating is True:
|
if config.rating is True:
|
||||||
|
@ -183,8 +238,10 @@ def create_metadata(output, data, s, title, filename):
|
||||||
|
|
||||||
|
|
||||||
def file_exists_fallback(author, title, view_id):
|
def file_exists_fallback(author, title, view_id):
|
||||||
with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
|
# do not write to index when check file size is enabled
|
||||||
idx.write(f"({view_id})\n")
|
if not config.check_file_size:
|
||||||
|
with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
|
||||||
|
idx.write(f"({view_id})\n")
|
||||||
if config.check is True:
|
if config.check is True:
|
||||||
print(
|
print(
|
||||||
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
|
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
|
||||||
|
@ -196,3 +253,10 @@ def file_exists_fallback(author, title, view_id):
|
||||||
it\'s already downloaded{config.END}'
|
it\'s already downloaded{config.END}'
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_image_cateory(s):
|
||||||
|
if s.find(class_ = 'button standard mobile-fix', string = 'Main Gallery') is not None:
|
||||||
|
return 'gallery'
|
||||||
|
elif s.find(class_='button standard mobile-fix', string = 'Scraps') is not None:
|
||||||
|
return 'scraps'
|
||||||
|
return 'unknown'
|
|
@ -18,6 +18,8 @@ def requests_retry_session(
|
||||||
):
|
):
|
||||||
"""Get a session, and retry in case of an error"""
|
"""Get a session, and retry in case of an error"""
|
||||||
session = session or requests.Session()
|
session = session or requests.Session()
|
||||||
|
if not config.request_compress:
|
||||||
|
session.headers.update({'Accept-Encoding': 'identity'})
|
||||||
if config.cookies is not None: # add cookies if present
|
if config.cookies is not None: # add cookies if present
|
||||||
cookies = cookielib.MozillaCookieJar(config.cookies)
|
cookies = cookielib.MozillaCookieJar(config.cookies)
|
||||||
cookies.load()
|
cookies.load()
|
||||||
|
@ -160,10 +162,10 @@ def fav_next_button(parse_next_button):
|
||||||
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
|
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
|
||||||
raise DownloadComplete
|
raise DownloadComplete
|
||||||
next_page_link = parse_next_button.attrs["href"]
|
next_page_link = parse_next_button.attrs["href"]
|
||||||
next_fav_num = re.search(r"\d+", next_page_link)
|
next_fav_num = re.findall(r"\d+", next_page_link)
|
||||||
|
|
||||||
if next_fav_num is None:
|
if len(next_fav_num) <= 0:
|
||||||
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
|
print(f"{config.WARN_COLOR}Failed to parse next favorite link{config.END}")
|
||||||
raise DownloadComplete
|
raise DownloadComplete
|
||||||
|
|
||||||
return f"{next_fav_num[0]}/next"
|
return f"{next_fav_num[-1]}/next"
|
||||||
|
|
|
@ -40,6 +40,8 @@ def start_indexing(path, layer=0):
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def check_file(path):
|
def check_file(path):
|
||||||
"""compare file view id with index list"""
|
"""compare file view id with index list"""
|
||||||
|
if config.check_file_size:
|
||||||
|
return False
|
||||||
view_id = path.split("/")[-2:-1][0]
|
view_id = path.split("/")[-2:-1][0]
|
||||||
with contextlib.suppress(FileNotFoundError):
|
with contextlib.suppress(FileNotFoundError):
|
||||||
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
|
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
|
||||||
|
|
|
@ -58,8 +58,13 @@ options:
|
||||||
download description as original html format, this won't work if json-description is enabled
|
download description as original html format, this won't work if json-description is enabled
|
||||||
--json-description, -jd
|
--json-description, -jd
|
||||||
download description as a JSON list
|
download description as a JSON list
|
||||||
|
--html-description, -hd
|
||||||
|
download description as original html format, this won't work if json-description is enabled
|
||||||
--login extract furaffinity cookies directly from your browser
|
--login extract furaffinity cookies directly from your browser
|
||||||
--index create an index of downloaded files in an output folder
|
--index create an index of downloaded files in an output folder
|
||||||
|
--real-category this will download to its real category sub folder. it's useful when download favorites to avoid duplicate files
|
||||||
|
--request-compress enable request compress which may save some bandwidth, but less file can be check by content-length. Since images won't be compress by default, it won't take much side effect to disable it by default
|
||||||
|
--check-file-size check all files size when download, this will skip build-in archive
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
python3 furaffinity-dl.py koul -> will download gallery of user koul
|
python3 furaffinity-dl.py koul -> will download gallery of user koul
|
||||||
|
|
Loading…
Add table
Reference in a new issue