fastdl/fastdownloader/downloader.py

import aiohttp
import asyncio
import aiofiles

import os
import re

from tqdm import tqdm

from urllib.parse import unquote
from mimetypes import guess_extension as extension


async def fetch_chunk(
        session: aiohttp.ClientSession,
        url: str,
        start_byte: int,
        end_byte: int,
        part_num: int,
        pbar: tqdm
):
    headers = {'Range': f'bytes={start_byte}-{end_byte}'}
    async with session.get(url, headers=headers) as response:
        with open(f'part_{part_num}', 'wb') as f:
            while True:
                chunk = await response.content.read(1024)
                if not chunk:
                    break
                f.write(chunk)
                pbar.update(len(chunk))


async def merge_files(filename, num_parts, pbar: tqdm):
    pbar.set_description("Merging")
    pbar.reset(total=pbar.total)
    async with aiofiles.open(filename, 'wb') as f_out:
        for i in range(num_parts):
            async with aiofiles.open(f'part_{i}', 'rb') as f_in:
                content = await f_in.read()
                await f_out.write(content)
                pbar.update(len(content))
            os.remove(f'part_{i}')


def get_filename(response: aiohttp.ClientResponse):
    headers = response.headers

    if ("content-disposition" in headers
            and "filename" in headers["content-disposition"]):
        filename = re.match(
            r'.*filename=\"(.+)\".*',
            headers["content-disposition"]
        ).group(1)
        return unquote(filename)
    else:
        url = str(response.url).split("?")[0]
        filename = url.rstrip("/").split("/")[-1]
        if re.findall(r'\.[a-zA-Z]{2}\w{0,2}$', filename):
            return unquote(filename)
        else:
            content_type = headers["Content-Type"]
            content_type = re.findall(r'([a-z]{4,11}/[\w+\-.]+)', content_type)[0]
            if "Content-Type" in headers and extension(content_type):
                filename += extension(content_type)
                return unquote(filename)
            else:
                return unquote(filename)


async def download_file(url, num_parts=20):
    async with aiohttp.ClientSession() as session:
        async with session.head(url, allow_redirects=True) as response:
            filename = get_filename(response)
            content_length = int(response.headers['Content-Length'])
            chunk_size = content_length // num_parts

            with tqdm(total=content_length, unit="B", unit_scale=True,
                      desc="Downloading") as pbar:
                tasks = []
                for i in range(num_parts):
                    start_byte = i * chunk_size
                    if i == num_parts - 1:
                        end_byte = content_length
                    else:
                        end_byte = start_byte + chunk_size - 1
                    tasks.append(
                        fetch_chunk(session, url, start_byte, end_byte, i, pbar))

                await asyncio.gather(*tasks)
                await merge_files(filename, num_parts, pbar)