From e5f158a475373d3fefc69c4f8a268ca8cef17ca5 Mon Sep 17 00:00:00 2001 From: BarsTiger Date: Mon, 6 Nov 2023 15:47:23 +0200 Subject: [PATCH] Initial commit --- .gitignore | 1 + README.md | 0 fastdownloader/__init__.py | 12 +++++ fastdownloader/__main__.py | 5 ++ fastdownloader/downloader.py | 90 ++++++++++++++++++++++++++++++++++++ pyproject.toml | 19 ++++++++ 6 files changed, 127 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 fastdownloader/__init__.py create mode 100644 fastdownloader/__main__.py create mode 100644 fastdownloader/downloader.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c04bc49 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/fastdownloader/__init__.py b/fastdownloader/__init__.py new file mode 100644 index 0000000..2950686 --- /dev/null +++ b/fastdownloader/__init__.py @@ -0,0 +1,12 @@ +from .downloader import download_file +from argparse import ArgumentParser +from asyncio import run + + +def main(): + parser = ArgumentParser() + parser.add_argument('url', help='URL to download') + parser.add_argument('-n', '--num-parts', type=int, default=20, + help='Number of parts to split the download into') + args = parser.parse_args() + run(download_file(args.url, args.num_parts)) diff --git a/fastdownloader/__main__.py b/fastdownloader/__main__.py new file mode 100644 index 0000000..daf5509 --- /dev/null +++ b/fastdownloader/__main__.py @@ -0,0 +1,5 @@ +from . import main + + +if __name__ == '__main__': + main() diff --git a/fastdownloader/downloader.py b/fastdownloader/downloader.py new file mode 100644 index 0000000..cf146da --- /dev/null +++ b/fastdownloader/downloader.py @@ -0,0 +1,90 @@ +import aiohttp +import asyncio +import aiofiles + +import os +import re + +from tqdm import tqdm + +from urllib.parse import unquote +from mimetypes import guess_extension as extension + + +async def fetch_chunk( + session: aiohttp.ClientSession, + url: str, + start_byte: int, + end_byte: int, + part_num: int, + pbar: tqdm +): + headers = {'Range': f'bytes={start_byte}-{end_byte}'} + async with session.get(url, headers=headers) as response: + with open(f'part_{part_num}', 'wb') as f: + while True: + chunk = await response.content.read(1024) + if not chunk: + break + f.write(chunk) + pbar.update(len(chunk)) + + +async def merge_files(filename, num_parts, pbar: tqdm): + pbar.set_description("Merging") + pbar.reset(total=pbar.total) + async with aiofiles.open(filename, 'wb') as f_out: + for i in range(num_parts): + async with aiofiles.open(f'part_{i}', 'rb') as f_in: + content = await f_in.read() + await f_out.write(content) + pbar.update(len(content)) + os.remove(f'part_{i}') + + +def get_filename(response: aiohttp.ClientResponse): + headers = response.headers + + if ("content-disposition" in headers + and "filename" in headers["content-disposition"]): + filename = re.match( + r'.*filename=\"(.+)\".*', + headers["content-disposition"] + ).group(1) + return unquote(filename) + else: + url = str(response.url).split("?")[0] + filename = url.rstrip("/").split("/")[-1] + if re.findall(r'\.[a-zA-Z]{2}\w{0,2}$', filename): + return unquote(filename) + else: + content_type = headers["Content-Type"] + content_type = re.findall(r'([a-z]{4,11}/[\w+\-.]+)', content_type)[0] + if "Content-Type" in headers and extension(content_type): + filename += extension(content_type) + return unquote(filename) + else: + return unquote(filename) + + +async def download_file(url, num_parts=20): + async with aiohttp.ClientSession() as session: + async with session.head(url, allow_redirects=True) as response: + filename = get_filename(response) + content_length = int(response.headers['Content-Length']) + chunk_size = content_length // num_parts + + with tqdm(total=content_length, unit="B", unit_scale=True, + desc="Downloading") as pbar: + tasks = [] + for i in range(num_parts): + start_byte = i * chunk_size + if i == num_parts - 1: + end_byte = content_length + else: + end_byte = start_byte + chunk_size - 1 + tasks.append( + fetch_chunk(session, url, start_byte, end_byte, i, pbar)) + + await asyncio.gather(*tasks) + await merge_files(filename, num_parts, pbar) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8c465a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[tool.poetry] +name = "fastdownloader" +version = "0.1.0" +description = "" +authors = ["BarsTiger "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" +aiohttp = "^3.8.6" +aiofiles = "^23.2.1" +tqdm = "^4.66.1" + +[tool.poetry.scripts] +fastdl = "fastdownloader:main" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"