Initial commit

This commit is contained in:
BarsTiger
2023-11-06 15:47:23 +02:00
commit e5f158a475
6 changed files with 127 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
from .downloader import download_file
from argparse import ArgumentParser
from asyncio import run
def main():
parser = ArgumentParser()
parser.add_argument('url', help='URL to download')
parser.add_argument('-n', '--num-parts', type=int, default=20,
help='Number of parts to split the download into')
args = parser.parse_args()
run(download_file(args.url, args.num_parts))

View File

@@ -0,0 +1,5 @@
from . import main
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,90 @@
import aiohttp
import asyncio
import aiofiles
import os
import re
from tqdm import tqdm
from urllib.parse import unquote
from mimetypes import guess_extension as extension
async def fetch_chunk(
session: aiohttp.ClientSession,
url: str,
start_byte: int,
end_byte: int,
part_num: int,
pbar: tqdm
):
headers = {'Range': f'bytes={start_byte}-{end_byte}'}
async with session.get(url, headers=headers) as response:
with open(f'part_{part_num}', 'wb') as f:
while True:
chunk = await response.content.read(1024)
if not chunk:
break
f.write(chunk)
pbar.update(len(chunk))
async def merge_files(filename, num_parts, pbar: tqdm):
pbar.set_description("Merging")
pbar.reset(total=pbar.total)
async with aiofiles.open(filename, 'wb') as f_out:
for i in range(num_parts):
async with aiofiles.open(f'part_{i}', 'rb') as f_in:
content = await f_in.read()
await f_out.write(content)
pbar.update(len(content))
os.remove(f'part_{i}')
def get_filename(response: aiohttp.ClientResponse):
headers = response.headers
if ("content-disposition" in headers
and "filename" in headers["content-disposition"]):
filename = re.match(
r'.*filename=\"(.+)\".*',
headers["content-disposition"]
).group(1)
return unquote(filename)
else:
url = str(response.url).split("?")[0]
filename = url.rstrip("/").split("/")[-1]
if re.findall(r'\.[a-zA-Z]{2}\w{0,2}$', filename):
return unquote(filename)
else:
content_type = headers["Content-Type"]
content_type = re.findall(r'([a-z]{4,11}/[\w+\-.]+)', content_type)[0]
if "Content-Type" in headers and extension(content_type):
filename += extension(content_type)
return unquote(filename)
else:
return unquote(filename)
async def download_file(url, num_parts=20):
async with aiohttp.ClientSession() as session:
async with session.head(url, allow_redirects=True) as response:
filename = get_filename(response)
content_length = int(response.headers['Content-Length'])
chunk_size = content_length // num_parts
with tqdm(total=content_length, unit="B", unit_scale=True,
desc="Downloading") as pbar:
tasks = []
for i in range(num_parts):
start_byte = i * chunk_size
if i == num_parts - 1:
end_byte = content_length
else:
end_byte = start_byte + chunk_size - 1
tasks.append(
fetch_chunk(session, url, start_byte, end_byte, i, pbar))
await asyncio.gather(*tasks)
await merge_files(filename, num_parts, pbar)