diff --git a/pyproject.toml b/pyproject.toml index bfe5f81..620448c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fastdownloader" -version = "0.2.0" +version = "0.3.0" description = "Simple parallel file downloader" readme = "README.md" requires-python = ">=3.11" diff --git a/src/fastdownloader/downloader.py b/src/fastdownloader/downloader.py index 92376f1..ef88dbf 100644 --- a/src/fastdownloader/downloader.py +++ b/src/fastdownloader/downloader.py @@ -8,8 +8,9 @@ import aiohttp from tqdm import tqdm CHUNK_READ_SIZE = 65536 -MAX_RETRIES = 3 -RETRY_DELAYS = [1, 2, 4] +MAX_RETRIES = 5 +RETRY_DELAYS = [1, 2, 4, 8, 16] +_RETRYABLE = (TimeoutError, OSError, aiohttp.ClientError) async def fetch_chunk( # noqa: PLR0913 @@ -34,7 +35,7 @@ async def fetch_chunk( # noqa: PLR0913 f.write(chunk) bytes_written += len(chunk) pbar.update(len(chunk)) - except (TimeoutError, aiohttp.ClientError): + except _RETRYABLE: pbar.update(-bytes_written) if attempt < MAX_RETRIES - 1: await asyncio.sleep(RETRY_DELAYS[attempt]) @@ -47,14 +48,26 @@ async def fetch_chunk( # noqa: PLR0913 async def fetch_single_stream( session: aiohttp.ClientSession, url: str, filepath: Path, pbar: tqdm, ) -> None: - async with session.get(url) as response: - with filepath.open("wb") as f: - while True: - chunk = await response.content.read(CHUNK_READ_SIZE) - if not chunk: - break - f.write(chunk) - pbar.update(len(chunk)) + for attempt in range(MAX_RETRIES): + bytes_written = 0 + try: + async with session.get(url) as response: + with filepath.open("wb") as f: + while True: + chunk = await response.content.read(CHUNK_READ_SIZE) + if not chunk: + break + f.write(chunk) + bytes_written += len(chunk) + pbar.update(len(chunk)) + except _RETRYABLE: + pbar.update(-bytes_written) + if attempt < MAX_RETRIES - 1: + await asyncio.sleep(RETRY_DELAYS[attempt]) + else: + raise + else: + return def get_filename(response: aiohttp.ClientResponse) -> str: @@ -85,12 +98,31 @@ def get_filename(response: aiohttp.ClientResponse) -> str: return unquote(filename) +async def _head_with_retry( + session: aiohttp.ClientSession, url: str, +) -> tuple[str, str | None, str]: + for attempt in range(MAX_RETRIES): + try: + async with session.head(url, allow_redirects=True) as response: + return ( + get_filename(response), + response.headers.get("Content-Length"), + response.headers.get("Accept-Ranges", ""), + ) + except _RETRYABLE: + if attempt < MAX_RETRIES - 1: + await asyncio.sleep(RETRY_DELAYS[attempt]) + else: + raise + raise RuntimeError("unreachable") # pragma: no cover + + async def download_file(url: str, num_parts: int = 20, *, position: int = 0) -> None: - async with aiohttp.ClientSession() as session: - async with session.head(url, allow_redirects=True) as response: - filename = get_filename(response) - content_length_str = response.headers.get("Content-Length") - accept_ranges = response.headers.get("Accept-Ranges", "") + timeout = aiohttp.ClientTimeout(total=None, connect=30, sock_read=60) + async with aiohttp.ClientSession(timeout=timeout) as session: + filename, content_length_str, accept_ranges = await _head_with_retry( + session, url, + ) supports_ranges = accept_ranges == "bytes" and content_length_str is not None