feat: add multiple link support, make slightly faster and more reliable, migrate to uv

This commit is contained in:
h
2026-02-17 01:28:10 +01:00
parent c4c8847686
commit ba0e23af4a
9 changed files with 831 additions and 128 deletions

View File

@@ -0,0 +1,33 @@
import asyncio
import sys
from argparse import ArgumentParser
from .downloader import download_file
def main() -> None:
parser = ArgumentParser()
parser.add_argument("urls", nargs="+", help="URLs to download")
parser.add_argument(
"-n",
"--num-parts",
type=int,
default=20,
help="Number of parts to split the download into",
)
args = parser.parse_args()
urls = [url for arg in args.urls for url in arg.split()]
async def _run_all() -> None:
results = await asyncio.gather(
*[
download_file(url, args.num_parts, position=i)
for i, url in enumerate(urls)
],
return_exceptions=True,
)
for url, result in zip(urls, results, strict=True):
if isinstance(result, BaseException):
print(f"Error downloading {url}: {result}", file=sys.stderr) # noqa: T201
asyncio.run(_run_all())

View File

@@ -0,0 +1,4 @@
from . import main
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,131 @@
import asyncio
import re
from mimetypes import guess_extension as extension
from pathlib import Path
from urllib.parse import unquote
import aiohttp
from tqdm import tqdm
CHUNK_READ_SIZE = 65536
MAX_RETRIES = 3
RETRY_DELAYS = [1, 2, 4]
async def fetch_chunk( # noqa: PLR0913
session: aiohttp.ClientSession,
url: str,
start_byte: int,
end_byte: int,
filepath: Path,
pbar: tqdm,
) -> None:
for attempt in range(MAX_RETRIES):
bytes_written = 0
try:
headers = {"Range": f"bytes={start_byte}-{end_byte}"}
async with session.get(url, headers=headers) as response:
with filepath.open("r+b") as f:
f.seek(start_byte)
while True:
chunk = await response.content.read(CHUNK_READ_SIZE)
if not chunk:
break
f.write(chunk)
bytes_written += len(chunk)
pbar.update(len(chunk))
except (TimeoutError, aiohttp.ClientError):
pbar.update(-bytes_written)
if attempt < MAX_RETRIES - 1:
await asyncio.sleep(RETRY_DELAYS[attempt])
else:
raise
else:
return
async def fetch_single_stream(
session: aiohttp.ClientSession, url: str, filepath: Path, pbar: tqdm,
) -> None:
async with session.get(url) as response:
with filepath.open("wb") as f:
while True:
chunk = await response.content.read(CHUNK_READ_SIZE)
if not chunk:
break
f.write(chunk)
pbar.update(len(chunk))
def get_filename(response: aiohttp.ClientResponse) -> str:
headers = response.headers
try:
if (
"content-disposition" in headers
and "filename" in headers["content-disposition"]
):
match = re.match(r".*filename=\"(.+)\".*", headers["content-disposition"])
if match:
return unquote(match.group(1))
except (KeyError, AttributeError):
pass
url = str(response.url).split("?")[0]
filename = url.rstrip("/").split("/")[-1]
if not filename:
return "download"
if re.findall(r"\.[a-zA-Z]{2}\w{0,2}$", filename):
return unquote(filename)
content_type = headers.get("Content-Type", "")
ct_match = re.findall(r"([a-z]{4,11}/[\w+\-.]+)", content_type)
if ct_match and extension(ct_match[0]):
filename += extension(ct_match[0])
return unquote(filename)
async def download_file(url: str, num_parts: int = 20, *, position: int = 0) -> None:
async with aiohttp.ClientSession() as session:
async with session.head(url, allow_redirects=True) as response:
filename = get_filename(response)
content_length_str = response.headers.get("Content-Length")
accept_ranges = response.headers.get("Accept-Ranges", "")
supports_ranges = accept_ranges == "bytes" and content_length_str is not None
temp_path = Path(filename + ".fastdl")
if supports_ranges:
content_length = int(content_length_str)
chunk_size = content_length // num_parts
with temp_path.open("wb") as f: # noqa: ASYNC230
f.truncate(content_length)
with tqdm(
total=content_length, unit="B", unit_scale=True,
desc=filename, position=position, leave=True,
) as pbar:
tasks = []
for i in range(num_parts):
start_byte = i * chunk_size
if i == num_parts - 1:
end_byte = content_length - 1
else:
end_byte = start_byte + chunk_size - 1
tasks.append(
fetch_chunk(
session, url, start_byte, end_byte, temp_path, pbar,
),
)
await asyncio.gather(*tasks)
else:
total = int(content_length_str) if content_length_str else None
with tqdm(
total=total, unit="B", unit_scale=True,
desc=filename, position=position, leave=True,
) as pbar:
await fetch_single_stream(session, url, temp_path, pbar)
temp_path.rename(filename)