Initial commit

2023-11-06 15:47:23 +02:00
commit e5f158a475
6 changed files with 127 additions and 0 deletions
--- a/fastdownloader/init.py
+++ b/fastdownloader/init.py
@@ -0,0 +1,12 @@
+from .downloader import download_file
+from argparse import ArgumentParser
+from asyncio import run
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('url', help='URL to download')
+    parser.add_argument('-n', '--num-parts', type=int, default=20,
+                        help='Number of parts to split the download into')
+    args = parser.parse_args()
+    run(download_file(args.url, args.num_parts))
--- a/fastdownloader/main.py
+++ b/fastdownloader/main.py
@@ -0,0 +1,5 @@
+from . import main
+
+
+if __name__ == '__main__':
+    main()
--- a/fastdownloader/downloader.py
+++ b/fastdownloader/downloader.py
@@ -0,0 +1,90 @@
+import aiohttp
+import asyncio
+import aiofiles
+
+import os
+import re
+
+from tqdm import tqdm
+
+from urllib.parse import unquote
+from mimetypes import guess_extension as extension
+
+
+async def fetch_chunk(
+        session: aiohttp.ClientSession,
+        url: str,
+        start_byte: int,
+        end_byte: int,
+        part_num: int,
+        pbar: tqdm
+):
+    headers = {'Range': f'bytes={start_byte}-{end_byte}'}
+    async with session.get(url, headers=headers) as response:
+        with open(f'part_{part_num}', 'wb') as f:
+            while True:
+                chunk = await response.content.read(1024)
+                if not chunk:
+                    break
+                f.write(chunk)
+                pbar.update(len(chunk))
+
+
+async def merge_files(filename, num_parts, pbar: tqdm):
+    pbar.set_description("Merging")
+    pbar.reset(total=pbar.total)
+    async with aiofiles.open(filename, 'wb') as f_out:
+        for i in range(num_parts):
+            async with aiofiles.open(f'part_{i}', 'rb') as f_in:
+                content = await f_in.read()
+                await f_out.write(content)
+                pbar.update(len(content))
+            os.remove(f'part_{i}')
+
+
+def get_filename(response: aiohttp.ClientResponse):
+    headers = response.headers
+
+    if ("content-disposition" in headers
+            and "filename" in headers["content-disposition"]):
+        filename = re.match(
+            r'.*filename=\"(.+)\".*',
+            headers["content-disposition"]
+        ).group(1)
+        return unquote(filename)
+    else:
+        url = str(response.url).split("?")[0]
+        filename = url.rstrip("/").split("/")[-1]
+        if re.findall(r'\.[a-zA-Z]{2}\w{0,2}$', filename):
+            return unquote(filename)
+        else:
+            content_type = headers["Content-Type"]
+            content_type = re.findall(r'([a-z]{4,11}/[\w+\-.]+)', content_type)[0]
+            if "Content-Type" in headers and extension(content_type):
+                filename += extension(content_type)
+                return unquote(filename)
+            else:
+                return unquote(filename)
+
+
+async def download_file(url, num_parts=20):
+    async with aiohttp.ClientSession() as session:
+        async with session.head(url, allow_redirects=True) as response:
+            filename = get_filename(response)
+            content_length = int(response.headers['Content-Length'])
+            chunk_size = content_length // num_parts
+
+            with tqdm(total=content_length, unit="B", unit_scale=True,
+                      desc="Downloading") as pbar:
+                tasks = []
+                for i in range(num_parts):
+                    start_byte = i * chunk_size
+                    if i == num_parts - 1:
+                        end_byte = content_length
+                    else:
+                        end_byte = start_byte + chunk_size - 1
+                    tasks.append(
+                        fetch_chunk(session, url, start_byte, end_byte, i, pbar))
+
+                await asyncio.gather(*tasks)
+                await merge_files(filename, num_parts, pbar)