feat: add multiple link support, make slightly faster and more reliable, migrate to uv
This commit is contained in:
131
src/fastdownloader/downloader.py
Normal file
131
src/fastdownloader/downloader.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import asyncio
|
||||
import re
|
||||
from mimetypes import guess_extension as extension
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
|
||||
import aiohttp
|
||||
from tqdm import tqdm
|
||||
|
||||
CHUNK_READ_SIZE = 65536
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAYS = [1, 2, 4]
|
||||
|
||||
|
||||
async def fetch_chunk( # noqa: PLR0913
|
||||
session: aiohttp.ClientSession,
|
||||
url: str,
|
||||
start_byte: int,
|
||||
end_byte: int,
|
||||
filepath: Path,
|
||||
pbar: tqdm,
|
||||
) -> None:
|
||||
for attempt in range(MAX_RETRIES):
|
||||
bytes_written = 0
|
||||
try:
|
||||
headers = {"Range": f"bytes={start_byte}-{end_byte}"}
|
||||
async with session.get(url, headers=headers) as response:
|
||||
with filepath.open("r+b") as f:
|
||||
f.seek(start_byte)
|
||||
while True:
|
||||
chunk = await response.content.read(CHUNK_READ_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
bytes_written += len(chunk)
|
||||
pbar.update(len(chunk))
|
||||
except (TimeoutError, aiohttp.ClientError):
|
||||
pbar.update(-bytes_written)
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
await asyncio.sleep(RETRY_DELAYS[attempt])
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
return
|
||||
|
||||
|
||||
async def fetch_single_stream(
|
||||
session: aiohttp.ClientSession, url: str, filepath: Path, pbar: tqdm,
|
||||
) -> None:
|
||||
async with session.get(url) as response:
|
||||
with filepath.open("wb") as f:
|
||||
while True:
|
||||
chunk = await response.content.read(CHUNK_READ_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
|
||||
|
||||
def get_filename(response: aiohttp.ClientResponse) -> str:
|
||||
headers = response.headers
|
||||
|
||||
try:
|
||||
if (
|
||||
"content-disposition" in headers
|
||||
and "filename" in headers["content-disposition"]
|
||||
):
|
||||
match = re.match(r".*filename=\"(.+)\".*", headers["content-disposition"])
|
||||
if match:
|
||||
return unquote(match.group(1))
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
|
||||
url = str(response.url).split("?")[0]
|
||||
filename = url.rstrip("/").split("/")[-1]
|
||||
if not filename:
|
||||
return "download"
|
||||
if re.findall(r"\.[a-zA-Z]{2}\w{0,2}$", filename):
|
||||
return unquote(filename)
|
||||
|
||||
content_type = headers.get("Content-Type", "")
|
||||
ct_match = re.findall(r"([a-z]{4,11}/[\w+\-.]+)", content_type)
|
||||
if ct_match and extension(ct_match[0]):
|
||||
filename += extension(ct_match[0])
|
||||
return unquote(filename)
|
||||
|
||||
|
||||
async def download_file(url: str, num_parts: int = 20, *, position: int = 0) -> None:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.head(url, allow_redirects=True) as response:
|
||||
filename = get_filename(response)
|
||||
content_length_str = response.headers.get("Content-Length")
|
||||
accept_ranges = response.headers.get("Accept-Ranges", "")
|
||||
|
||||
supports_ranges = accept_ranges == "bytes" and content_length_str is not None
|
||||
|
||||
temp_path = Path(filename + ".fastdl")
|
||||
|
||||
if supports_ranges:
|
||||
content_length = int(content_length_str)
|
||||
chunk_size = content_length // num_parts
|
||||
|
||||
with temp_path.open("wb") as f: # noqa: ASYNC230
|
||||
f.truncate(content_length)
|
||||
|
||||
with tqdm(
|
||||
total=content_length, unit="B", unit_scale=True,
|
||||
desc=filename, position=position, leave=True,
|
||||
) as pbar:
|
||||
tasks = []
|
||||
for i in range(num_parts):
|
||||
start_byte = i * chunk_size
|
||||
if i == num_parts - 1:
|
||||
end_byte = content_length - 1
|
||||
else:
|
||||
end_byte = start_byte + chunk_size - 1
|
||||
tasks.append(
|
||||
fetch_chunk(
|
||||
session, url, start_byte, end_byte, temp_path, pbar,
|
||||
),
|
||||
)
|
||||
await asyncio.gather(*tasks)
|
||||
else:
|
||||
total = int(content_length_str) if content_length_str else None
|
||||
with tqdm(
|
||||
total=total, unit="B", unit_scale=True,
|
||||
desc=filename, position=position, leave=True,
|
||||
) as pbar:
|
||||
await fetch_single_stream(session, url, temp_path, pbar)
|
||||
|
||||
temp_path.rename(filename)
|
||||
Reference in New Issue
Block a user