mirror of
https://github.com/Radiquum/furaffinity-dl.git
synced 2025-04-19 07:44:37 +00:00
changelog:
ability to add comments in username list with "#" autoremoval of "_" in usernames speedup filter checking add basic indexing -> speedup existing file checking for newer files other small changes
This commit is contained in:
parent
377df392e5
commit
675f558d03
5 changed files with 260 additions and 122 deletions
|
@ -72,7 +72,10 @@ parser.add_argument(
|
||||||
Folder-Name-Here",
|
Folder-Name-Here",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-s", "--start", default=1, help="page number to start from",
|
"-s",
|
||||||
|
"--start",
|
||||||
|
default=1,
|
||||||
|
help="page number to start from",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-S",
|
"-S",
|
||||||
|
@ -115,7 +118,7 @@ parser.add_argument(
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--download",
|
"--download",
|
||||||
help="download a specific submission /view/12345678/",
|
help="download a specific submission by providing its id",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-jd",
|
"-jd",
|
||||||
|
@ -129,6 +132,11 @@ parser.add_argument(
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="extract furaffinity cookies directly from your browser",
|
help="extract furaffinity cookies directly from your browser",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--index",
|
||||||
|
action="store_true",
|
||||||
|
help="create an index of downloaded files in an output folder",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -136,7 +144,7 @@ args = parser.parse_args()
|
||||||
username = args.username
|
username = args.username
|
||||||
category = args.category
|
category = args.category
|
||||||
|
|
||||||
if username != None:
|
if username is not None:
|
||||||
username = username.split(" ")
|
username = username.split(" ")
|
||||||
|
|
||||||
# Custom input
|
# Custom input
|
||||||
|
@ -153,6 +161,7 @@ folder = args.folder
|
||||||
|
|
||||||
login = args.login
|
login = args.login
|
||||||
check = args.check
|
check = args.check
|
||||||
|
index = args.index
|
||||||
submissions = args.submissions
|
submissions = args.submissions
|
||||||
json_description = args.json_description
|
json_description = args.json_description
|
||||||
metadata = args.metadata
|
metadata = args.metadata
|
||||||
|
@ -168,3 +177,26 @@ END = "\033[0m"
|
||||||
|
|
||||||
# Globals
|
# Globals
|
||||||
BASE_URL = "https://www.furaffinity.net"
|
BASE_URL = "https://www.furaffinity.net"
|
||||||
|
username_replace_chars = {
|
||||||
|
" ": "",
|
||||||
|
"_": "",
|
||||||
|
}
|
||||||
|
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
||||||
|
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||||
|
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|
||||||
|
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||||
|
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|
||||||
|
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||||
|
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|
||||||
|
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||||
|
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|
||||||
|
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
||||||
|
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
||||||
|
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
||||||
|
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|
||||||
|
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
||||||
|
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|
||||||
|
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
||||||
|
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
||||||
|
|REM[insder]*\\b\
|
||||||
|
|\\bREF|\\bSale|auction|multislot|stream|adopt'
|
||||||
|
|
|
@ -1,12 +1,16 @@
|
||||||
|
import http.cookiejar as cookielib
|
||||||
import json
|
import json
|
||||||
from tqdm import tqdm
|
|
||||||
from pathvalidate import sanitize_filename
|
|
||||||
import Modules.config as config
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import http.cookiejar as cookielib
|
from pathvalidate import sanitize_filename
|
||||||
from Modules.functions import system_message_handler, check_filter, download_complete
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import Modules.config as config
|
||||||
|
from Modules.functions import download_complete
|
||||||
|
from Modules.functions import requests_retry_session
|
||||||
|
from Modules.functions import system_message_handler
|
||||||
|
|
||||||
session = requests.session()
|
session = requests.session()
|
||||||
if config.cookies is not None: # add cookies if present
|
if config.cookies is not None: # add cookies if present
|
||||||
|
@ -14,8 +18,10 @@ if config.cookies is not None: # add cookies if present
|
||||||
cookies.load()
|
cookies.load()
|
||||||
session.cookies = cookies
|
session.cookies = cookies
|
||||||
|
|
||||||
|
|
||||||
def download(path):
|
def download(path):
|
||||||
response = session.get(f"{config.BASE_URL}{path}")
|
|
||||||
|
response = requests_retry_session(session=session).get(f"{config.BASE_URL}{path}")
|
||||||
s = BeautifulSoup(response.text, "html.parser")
|
s = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
# System messages
|
# System messages
|
||||||
|
@ -23,20 +29,47 @@ def download(path):
|
||||||
system_message_handler(s)
|
system_message_handler(s)
|
||||||
|
|
||||||
image = s.find(class_="download").find("a").attrs.get("href")
|
image = s.find(class_="download").find("a").attrs.get("href")
|
||||||
title = s.find(class_="submission-title").find("p").contents[0]
|
filename = sanitize_filename(image.split("/")[-1:][0])
|
||||||
title = sanitize_filename(title)
|
|
||||||
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
|
|
||||||
|
|
||||||
|
author = s.find(class_="submission-id-sub-container").find("a").find("strong").text
|
||||||
|
title = sanitize_filename(s.find(class_="submission-title").find("p").contents[0])
|
||||||
|
view_id = int(path.split("/")[-2:-1][0])
|
||||||
|
|
||||||
|
output = f"{config.output_folder}/{author}"
|
||||||
|
rating = s.find(class_="rating-box").text.strip()
|
||||||
|
|
||||||
|
if config.category != "gallery":
|
||||||
|
output = f"{config.output_folder}/{author}/{config.category}"
|
||||||
|
if config.folder is not None:
|
||||||
|
output = f"{config.output_folder}/{author}/{config.folder}"
|
||||||
|
os.makedirs(output, exist_ok=True)
|
||||||
|
|
||||||
|
output_path = f"{output}/{title} ({view_id}) - {filename}"
|
||||||
|
output_path_fb = f"{output}/{title} - {filename}"
|
||||||
|
if config.rating is True:
|
||||||
|
os.makedirs(f"{output}/{rating}", exist_ok=True)
|
||||||
|
output_path = f"{output}/{rating}/{title} ({view_id}) - {filename}"
|
||||||
|
output_path_fb = f"{output}/{rating}/{title} - {filename}"
|
||||||
|
|
||||||
|
if config.dont_redownload is True and os.path.isfile(output_path_fb):
|
||||||
|
return file_exists_fallback(author, title)
|
||||||
|
|
||||||
|
image_url = f"https:{image}"
|
||||||
|
download_file(
|
||||||
|
image_url,
|
||||||
|
output_path,
|
||||||
|
f"{title} - \
|
||||||
|
[{rating}]",
|
||||||
|
)
|
||||||
|
|
||||||
|
if config.metadata is True:
|
||||||
|
dsc = s.find(class_="submission-description").text.strip().replace("\r\n", "\n")
|
||||||
if config.json_description is True:
|
if config.json_description is True:
|
||||||
dsc = []
|
dsc = []
|
||||||
filename = image.split("/")[-1:][0]
|
|
||||||
data = {
|
data = {
|
||||||
"id": int(path.split("/")[-2:-1][0]),
|
"id": view_id,
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
"author": s.find(class_="submission-id-sub-container")
|
"author": author,
|
||||||
.find("a")
|
|
||||||
.find("strong")
|
|
||||||
.text,
|
|
||||||
"date": s.find(class_="popup_date").attrs.get("title"),
|
"date": s.find(class_="popup_date").attrs.get("title"),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": dsc,
|
"description": dsc,
|
||||||
|
@ -48,53 +81,19 @@ def download(path):
|
||||||
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
|
"gender": s.find(class_="info").findAll("div")[3].find("span").text,
|
||||||
"views": int(s.find(class_="views").find(class_="font-large").text),
|
"views": int(s.find(class_="views").find(class_="font-large").text),
|
||||||
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
|
"favorites": int(s.find(class_="favorites").find(class_="font-large").text),
|
||||||
"rating": s.find(class_="rating-box").text.strip(),
|
"rating": rating,
|
||||||
"comments": [],
|
"comments": [],
|
||||||
}
|
}
|
||||||
if config.submission_filter is True and check_filter(title) is True:
|
|
||||||
print(
|
|
||||||
f'{config.WARN_COLOR}"{title}" was filtered and will not be \
|
|
||||||
downloaded - {data.get("url")}{config.END}'
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
image_url = f"https:{image}"
|
|
||||||
output = f"{config.output_folder}/{data.get('author')}"
|
|
||||||
if config.category != "gallery":
|
|
||||||
output = f"{config.output_folder}/{data.get('author')}/{config.category}"
|
|
||||||
if config.folder is not None:
|
|
||||||
output = f"{config.output_folder}/{data.get('author')}/{config.folder}"
|
|
||||||
os.makedirs(output, exist_ok=True)
|
|
||||||
filename = sanitize_filename(filename)
|
|
||||||
output_path = f"{output}/{title} - {filename}"
|
|
||||||
if config.rating is True:
|
|
||||||
os.makedirs(f'{output}/{data.get("rating")}', exist_ok=True)
|
|
||||||
output_path = f'{output}/{data.get("rating")}/{title} - {filename}'
|
|
||||||
|
|
||||||
if config.dont_redownload is True and os.path.isfile(output_path):
|
|
||||||
if config.check is True:
|
|
||||||
print(
|
|
||||||
f"{config.SUCCESS_COLOR}Downloaded all recent files of \"{data.get('author')}\"{config.END}"
|
|
||||||
)
|
|
||||||
raise download_complete
|
|
||||||
print(
|
|
||||||
f'{config.WARN_COLOR}Skipping "{title}" since it\'s already downloaded{config.END}'
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
download_file(
|
|
||||||
image_url,
|
|
||||||
output_path,
|
|
||||||
f'{title} - \
|
|
||||||
[{data.get("rating")}]',
|
|
||||||
)
|
|
||||||
|
|
||||||
if config.metadata is True:
|
|
||||||
create_metadata(output, data, s, title, filename)
|
create_metadata(output, data, s, title, filename)
|
||||||
if config.download is not None:
|
if config.download is not None:
|
||||||
print(f'{config.SUCCESS_COLOR}File saved as "{output_path}" {config.END}')
|
print(
|
||||||
|
f'{config.SUCCESS_COLOR}File saved as \
|
||||||
|
"{output_path}" {config.END}'
|
||||||
|
)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def download_file(url, fname, desc):
|
def download_file(url, fname, desc):
|
||||||
try:
|
try:
|
||||||
r = session.get(url, stream=True)
|
r = session.get(url, stream=True)
|
||||||
|
@ -122,6 +121,7 @@ def download_file(url, fname, desc):
|
||||||
exit()
|
exit()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def create_metadata(output, data, s, title, filename):
|
def create_metadata(output, data, s, title, filename):
|
||||||
if config.rating is True:
|
if config.rating is True:
|
||||||
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
|
os.makedirs(f'{output}/{data.get("rating")}/metadata', exist_ok=True)
|
||||||
|
@ -164,3 +164,17 @@ def create_metadata(output, data, s, title, filename):
|
||||||
# Write a UTF-8 encoded JSON file for metadata
|
# Write a UTF-8 encoded JSON file for metadata
|
||||||
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
|
with open(f"{metadata}.json", "w", encoding="utf-8") as f:
|
||||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def file_exists_fallback(author, title):
|
||||||
|
if config.check is True:
|
||||||
|
print(
|
||||||
|
f'fallback: {config.SUCCESS_COLOR}Downloaded all recent files of \
|
||||||
|
"{author}"{config.END}'
|
||||||
|
)
|
||||||
|
raise download_complete
|
||||||
|
print(
|
||||||
|
f'fallback: {config.WARN_COLOR}Skipping "{title}" since \
|
||||||
|
it\'s already downloaded{config.END}'
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
|
@ -4,6 +4,8 @@ import re
|
||||||
import browser_cookie3
|
import browser_cookie3
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util import Retry
|
||||||
|
|
||||||
import Modules.config as config
|
import Modules.config as config
|
||||||
|
|
||||||
|
@ -13,39 +15,43 @@ if config.cookies is not None: # add cookies if present
|
||||||
cookies.load()
|
cookies.load()
|
||||||
session.cookies = cookies
|
session.cookies = cookies
|
||||||
|
|
||||||
|
session.headers.update({"User-Agent": config.user_agent})
|
||||||
|
|
||||||
|
|
||||||
|
def requests_retry_session(
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=0.3,
|
||||||
|
status_forcelist=(500, 502, 504, 104),
|
||||||
|
session=None,
|
||||||
|
):
|
||||||
|
session = session or requests.Session()
|
||||||
|
retry = Retry(
|
||||||
|
total=retries,
|
||||||
|
read=retries,
|
||||||
|
connect=retries,
|
||||||
|
backoff_factor=backoff_factor,
|
||||||
|
status_forcelist=status_forcelist,
|
||||||
|
)
|
||||||
|
adapter = HTTPAdapter(max_retries=retry)
|
||||||
|
session.mount("http://", adapter)
|
||||||
|
session.mount("https://", adapter)
|
||||||
|
return session
|
||||||
|
|
||||||
|
|
||||||
class download_complete(Exception):
|
class download_complete(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def check_filter(title):
|
def check_filter(title):
|
||||||
search = 'YCH[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
|
||||||
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
|
||||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE\
|
|
||||||
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
|
||||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*ABLE\
|
|
||||||
|AVAIL[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
|
||||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*CLONE\
|
|
||||||
|CLONE[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
|
||||||
|YCH[a-z $-/:-?{-~!"^_`\\[\\]]*LIM\
|
|
||||||
|LIM[a-z $-/:-?{-~!"^_`\\[\\]]*YCH\
|
|
||||||
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*OPEN\
|
|
||||||
|OPEN[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
|
||||||
|COM[a-z $-/:-?{-~!"^_`\\[\\]]*CLOSE[^r]\
|
|
||||||
|CLOSE[a-z $-/:-?{-~!"^_`\\[\\]]*COM\
|
|
||||||
|FIX[a-z $-/:-?{-~!"^_`\\[\\]]*ICE\
|
|
||||||
|TELEGRAM[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
|
||||||
|TG[a-z $-/:-?{-~!"^_`\\[\\]]*STICK\
|
|
||||||
|REM[insder]*\\b\
|
|
||||||
|\\bREF|\\bSale|auction|multislot|stream|adopt'
|
|
||||||
|
|
||||||
match = re.search(
|
match = re.search(
|
||||||
search,
|
config.search,
|
||||||
title,
|
title,
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
if match is not None and title == match.string:
|
if match is not None and title == match.string:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -68,9 +74,7 @@ def system_message_handler(s):
|
||||||
raise download_complete
|
raise download_complete
|
||||||
|
|
||||||
|
|
||||||
def login(user_agent):
|
def login():
|
||||||
|
|
||||||
session.headers.update({"User-Agent": user_agent})
|
|
||||||
|
|
||||||
CJ = browser_cookie3.load()
|
CJ = browser_cookie3.load()
|
||||||
|
|
||||||
|
@ -103,8 +107,6 @@ by using "-c cookies.txt"{config.END}'
|
||||||
furaffinity in your browser, or you can export cookies.txt manually{config.END}"
|
furaffinity in your browser, or you can export cookies.txt manually{config.END}"
|
||||||
)
|
)
|
||||||
|
|
||||||
exit()
|
|
||||||
|
|
||||||
|
|
||||||
def next_button(page_url):
|
def next_button(page_url):
|
||||||
response = session.get(page_url)
|
response = session.get(page_url)
|
||||||
|
@ -130,15 +132,17 @@ def next_button(page_url):
|
||||||
raise download_complete
|
raise download_complete
|
||||||
page_num = next_button.parent.attrs["action"].split("/")[-2]
|
page_num = next_button.parent.attrs["action"].split("/")[-2]
|
||||||
else:
|
else:
|
||||||
|
next_button = s.find("a", class_="button standard right", text="Next")
|
||||||
page_num = fav_next_button(s)
|
page_num = fav_next_button(s)
|
||||||
print(f"Downloading page {page_num} - {page_url}")
|
print(
|
||||||
|
f"Downloading page {page_num} - {config.BASE_URL}/{next_button.parent.attrs['action']}"
|
||||||
|
)
|
||||||
return page_num
|
return page_num
|
||||||
|
|
||||||
|
|
||||||
def fav_next_button(s):
|
def fav_next_button():
|
||||||
# unlike galleries that are sequentially numbered, favorites use a different scheme.
|
# unlike galleries that are sequentially numbered, favorites use a different scheme.
|
||||||
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
|
# the "page_num" is instead: [set of numbers]/next (the trailing /next is required)
|
||||||
next_button = s.find("a", class_="button standard right", text="Next")
|
|
||||||
if next_button is None:
|
if next_button is None:
|
||||||
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
|
print(f"{config.WARN_COLOR}Unable to find next button{config.END}")
|
||||||
raise download_complete
|
raise download_complete
|
||||||
|
|
37
Modules/index.py
Normal file
37
Modules/index.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import contextlib
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import Modules.config as config
|
||||||
|
|
||||||
|
|
||||||
|
def start_indexing(path, layer=0):
|
||||||
|
"""Recursively iterate over each item in path
|
||||||
|
and print item's name.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# make Path object from input string
|
||||||
|
path = Path(path)
|
||||||
|
with open(f"{config.output_folder}/index.idx", encoding="utf-8", mode="a+") as idx:
|
||||||
|
|
||||||
|
# iter the directory
|
||||||
|
for p in path.iterdir():
|
||||||
|
|
||||||
|
if p.is_file():
|
||||||
|
idx.write(f"{p}\n")
|
||||||
|
|
||||||
|
elif p.is_dir():
|
||||||
|
start_indexing(p, layer + 1)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError()
|
||||||
|
|
||||||
|
|
||||||
|
def check_file(path):
|
||||||
|
view_id = path.split("/")[-2:-1][0]
|
||||||
|
with contextlib.suppress(FileNotFoundError):
|
||||||
|
with open(f"{config.output_folder}/index.idx", encoding="utf-8") as idx:
|
||||||
|
index = idx.read()
|
||||||
|
match = re.search(view_id, index)
|
||||||
|
if match is not None:
|
||||||
|
return True
|
|
@ -9,10 +9,14 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import Modules.config as config
|
import Modules.config as config
|
||||||
from Modules.download import download
|
from Modules.download import download
|
||||||
|
from Modules.functions import check_filter
|
||||||
from Modules.functions import download_complete
|
from Modules.functions import download_complete
|
||||||
from Modules.functions import login
|
from Modules.functions import login
|
||||||
from Modules.functions import next_button
|
from Modules.functions import next_button
|
||||||
|
from Modules.functions import requests_retry_session
|
||||||
from Modules.functions import system_message_handler
|
from Modules.functions import system_message_handler
|
||||||
|
from Modules.index import check_file
|
||||||
|
from Modules.index import start_indexing
|
||||||
|
|
||||||
# get session
|
# get session
|
||||||
session = requests.session()
|
session = requests.session()
|
||||||
|
@ -31,12 +35,13 @@ def main():
|
||||||
while True:
|
while True:
|
||||||
if config.stop == page_num:
|
if config.stop == page_num:
|
||||||
print(
|
print(
|
||||||
f'{config.WARN_COLOR}Reached page "{config.stop}", stopping.{config.END}'
|
f'{config.WARN_COLOR}Reached page "{config.stop}", \
|
||||||
|
stopping.{config.END}'
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
page_url = f"{download_url}/{page_num}"
|
page_url = f"{download_url}/{page_num}"
|
||||||
response = session.get(page_url)
|
response = requests_retry_session(session=session).get(page_url)
|
||||||
s = BeautifulSoup(response.text, "html.parser")
|
s = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
# System messages
|
# System messages
|
||||||
|
@ -50,7 +55,30 @@ def main():
|
||||||
|
|
||||||
# Download all images on the page
|
# Download all images on the page
|
||||||
for img in s.findAll("figure"):
|
for img in s.findAll("figure"):
|
||||||
download(img.find("a").attrs.get("href"))
|
title = img.find("figcaption").contents[0].text
|
||||||
|
img_url = img.find("a").attrs.get("href")
|
||||||
|
|
||||||
|
if config.submission_filter is True and check_filter(title) is True:
|
||||||
|
print(
|
||||||
|
f'{config.WARN_COLOR}"{title}" was filtered and will not be \
|
||||||
|
downloaded - {config.BASE_URL}{img_url}{config.END}'
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if config.dont_redownload is True and check_file(img_url) is True:
|
||||||
|
if config.check is True:
|
||||||
|
print(
|
||||||
|
f'{config.SUCCESS_COLOR}Downloaded all recent files of \
|
||||||
|
"{config.username[0]}"{config.END}'
|
||||||
|
)
|
||||||
|
raise download_complete
|
||||||
|
print(
|
||||||
|
f'{config.WARN_COLOR}Skipping "{title}" since \
|
||||||
|
it\'s already downloaded{config.END}'
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
download(img_url)
|
||||||
sleep(config.interval)
|
sleep(config.interval)
|
||||||
|
|
||||||
page_num = next_button(page_url)
|
page_num = next_button(page_url)
|
||||||
|
@ -58,13 +86,18 @@ def main():
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if config.login is True:
|
if config.login is True:
|
||||||
login(config.user_agent)
|
login()
|
||||||
|
exit()
|
||||||
|
|
||||||
|
if config.index is True:
|
||||||
|
if os.path.isfile(f"{config.output_folder}/index.idx"):
|
||||||
|
os.remove(f"{config.output_folder}/index.idx")
|
||||||
|
start_indexing(config.output_folder)
|
||||||
|
print(f"{config.SUCCESS_COLOR}indexing finished{config.END}")
|
||||||
|
exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = session.get(config.BASE_URL)
|
response = requests_retry_session(session=session).get(config.BASE_URL)
|
||||||
except ConnectionError:
|
|
||||||
print(f"{config.ERROR_COLOR}Connection failed{config.END}")
|
|
||||||
exit()
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print(f"{config.WARN_COLOR}Aborted by user{config.END}")
|
print(f"{config.WARN_COLOR}Aborted by user{config.END}")
|
||||||
exit()
|
exit()
|
||||||
|
@ -72,14 +105,18 @@ if __name__ == "__main__":
|
||||||
s = BeautifulSoup(response.text, "html.parser")
|
s = BeautifulSoup(response.text, "html.parser")
|
||||||
if s.find(class_="loggedin_user_avatar") is not None:
|
if s.find(class_="loggedin_user_avatar") is not None:
|
||||||
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
|
account_username = s.find(class_="loggedin_user_avatar").attrs.get("alt")
|
||||||
print(f'{config.SUCCESS_COLOR}Logged in as "{account_username}"{config.END}')
|
print(
|
||||||
|
f'{config.SUCCESS_COLOR}Logged in as \
|
||||||
|
"{account_username}"{config.END}'
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
f"{config.WARN_COLOR}Not logged in, NSFW content is inaccessible{config.END}"
|
f"{config.WARN_COLOR}Not logged in, NSFW content \
|
||||||
|
is inaccessible{config.END}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if config.download is not None:
|
if config.download is not None:
|
||||||
download(config.download)
|
download(f"/view/{config.download}/")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
if config.submissions is True:
|
if config.submissions is True:
|
||||||
|
@ -109,11 +146,22 @@ downloading "{config.folder[1]}"{config.END}'
|
||||||
)
|
)
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
try:
|
||||||
if os.path.exists(config.username[0]):
|
if os.path.exists(config.username[0]):
|
||||||
data = open(config.username[0]).read()
|
data = open(config.username[0]).read()
|
||||||
config.username = filter(None, data.split("\n"))
|
config.username = filter(None, data.split("\n"))
|
||||||
|
except TypeError or AttributeError:
|
||||||
|
print(
|
||||||
|
f"{config.ERROR_COLOR}Please enter a username \
|
||||||
|
or provide a file with usernames (1 username per line){config.END}"
|
||||||
|
)
|
||||||
|
exit()
|
||||||
|
|
||||||
for username in config.username:
|
for username in config.username:
|
||||||
|
username = username.split("#")[0].translate(
|
||||||
|
str.maketrans(config.username_replace_chars)
|
||||||
|
)
|
||||||
|
if username != "":
|
||||||
print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
|
print(f'{config.SUCCESS_COLOR}Now downloading "{username}"{config.END}')
|
||||||
download_url = f"{config.BASE_URL}/{config.category}/{username}"
|
download_url = f"{config.BASE_URL}/{config.category}/{username}"
|
||||||
main()
|
main()
|
||||||
|
@ -121,3 +169,6 @@ downloading "{config.folder[1]}"{config.END}'
|
||||||
f'{config.SUCCESS_COLOR}Finished \
|
f'{config.SUCCESS_COLOR}Finished \
|
||||||
downloading "{username}"{config.END}'
|
downloading "{username}"{config.END}'
|
||||||
)
|
)
|
||||||
|
if os.path.isfile(f"{config.output_folder}/index.idx"):
|
||||||
|
os.remove(f"{config.output_folder}/index.idx")
|
||||||
|
start_indexing(config.output_folder)
|
||||||
|
|
Loading…
Add table
Reference in a new issue