diff --git a/anymusicbot.py b/anymusicbot.py index ed777dd..52cea9f 100644 --- a/anymusicbot.py +++ b/anymusicbot.py @@ -1,4 +1,4 @@ from bot import main -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bot/__init__.py b/bot/__init__.py index 6cb621c..786db19 100644 --- a/bot/__init__.py +++ b/bot/__init__.py @@ -1,11 +1,11 @@ -from rich import print import contextlib +from rich import print + async def runner(): - from .common import dp, bot - - from . import handlers, callbacks + from . import callbacks, handlers + from .common import bot, dp from .modules.error import on_error dp.error.register(on_error) @@ -20,8 +20,8 @@ async def runner(): def plugins(): import nest_asyncio - from rich import traceback from icecream import ic + from rich import traceback nest_asyncio.apply() traceback.install() @@ -33,8 +33,8 @@ def main(): plugins() - print('Starting...') + print("Starting...") with contextlib.suppress(KeyboardInterrupt): asyncio.run(runner()) - print('[red]Stopped.[/]') + print("[red]Stopped.[/]") diff --git a/bot/__main__.py b/bot/__main__.py index c7c70d0..868d99e 100644 --- a/bot/__main__.py +++ b/bot/__main__.py @@ -1,4 +1,4 @@ from . import main -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bot/callbacks/__init__.py b/bot/callbacks/__init__.py index 6ad7d02..ab93328 100644 --- a/bot/callbacks/__init__.py +++ b/bot/callbacks/__init__.py @@ -1,11 +1,8 @@ from aiogram import Router -from . import ( - full_menu, - on_home, - settings, -) + from bot.middlewares import PrivateButtonMiddleware, SettingsInjectorMiddleware +from . import full_menu, on_home, settings router = Router() diff --git a/bot/callbacks/full_menu.py b/bot/callbacks/full_menu.py index fcacee9..5bdc444 100644 --- a/bot/callbacks/full_menu.py +++ b/bot/callbacks/full_menu.py @@ -1,19 +1,16 @@ -from aiogram import Router, F, Bot -from aiogram.types import ( - CallbackQuery -) +from aiogram import Bot, F, Router +from aiogram.types import CallbackQuery from bot.factories.full_menu import FullMenuCallback - from bot.keyboards.inline.settings import get_settings_kb router = Router() -@router.callback_query(FullMenuCallback.filter(F.action == 'settings')) +@router.callback_query(FullMenuCallback.filter(F.action == "settings")) async def on_settings(callback_query: CallbackQuery, bot: Bot): await bot.edit_message_text( inline_message_id=callback_query.inline_message_id, - text='⚙️ Settings', - reply_markup=get_settings_kb() + text="⚙️ Settings", + reply_markup=get_settings_kb(), ) diff --git a/bot/callbacks/on_home.py b/bot/callbacks/on_home.py index a97ae70..d46b514 100644 --- a/bot/callbacks/on_home.py +++ b/bot/callbacks/on_home.py @@ -1,19 +1,16 @@ -from aiogram import Router, F, Bot -from aiogram.types import ( - CallbackQuery -) +from aiogram import Bot, F, Router +from aiogram.types import CallbackQuery from bot.factories.full_menu import FullMenuCallback - from bot.keyboards.inline.full_menu import get_full_menu_kb router = Router() -@router.callback_query(FullMenuCallback.filter(F.action == 'home')) +@router.callback_query(FullMenuCallback.filter(F.action == "home")) async def on_home(callback_query: CallbackQuery, bot: Bot): await bot.edit_message_text( inline_message_id=callback_query.inline_message_id, - text='⚙️ Menu', - reply_markup=get_full_menu_kb() + text="⚙️ Menu", + reply_markup=get_full_menu_kb(), ) diff --git a/bot/callbacks/settings.py b/bot/callbacks/settings.py index f940d1a..5af529e 100644 --- a/bot/callbacks/settings.py +++ b/bot/callbacks/settings.py @@ -1,38 +1,30 @@ -from aiogram import Router, Bot -from aiogram.types import ( - CallbackQuery -) +from aiogram import Bot, Router from aiogram.exceptions import TelegramBadRequest +from aiogram.types import CallbackQuery from bot.factories.open_setting import OpenSettingCallback, SettingChoiceCallback - from bot.keyboards.inline.setting import get_setting_kb -from bot.modules.settings import settings_strings, UserSettings +from bot.modules.settings import UserSettings, settings_strings router = Router() @router.callback_query(OpenSettingCallback.filter()) async def on_settings( - callback_query: CallbackQuery, - callback_data: OpenSettingCallback, - bot: Bot + callback_query: CallbackQuery, callback_data: OpenSettingCallback, bot: Bot ): await bot.edit_message_text( inline_message_id=callback_query.inline_message_id, text=settings_strings[callback_data.s_id].description, reply_markup=get_setting_kb( - callback_data.s_id, - str(callback_query.from_user.id) - ) + callback_data.s_id, str(callback_query.from_user.id) + ), ) @router.callback_query(SettingChoiceCallback.filter()) async def on_change_setting( - callback_query: CallbackQuery, - callback_data: SettingChoiceCallback, - bot: Bot + callback_query: CallbackQuery, callback_data: SettingChoiceCallback, bot: Bot ): UserSettings(callback_query.from_user.id)[callback_data.s_id] = callback_data.choice try: @@ -40,9 +32,8 @@ async def on_change_setting( inline_message_id=callback_query.inline_message_id, text=settings_strings[callback_data.s_id].description, reply_markup=get_setting_kb( - callback_data.s_id, - str(callback_query.from_user.id) - ) + callback_data.s_id, str(callback_query.from_user.id) + ), ) except TelegramBadRequest: pass diff --git a/bot/common.py b/bot/common.py index 572f0a2..3158a9e 100644 --- a/bot/common.py +++ b/bot/common.py @@ -1,6 +1,8 @@ from aiogram import Bot, Dispatcher -from bot.modules.fsm import InDbStorage from rich.console import Console + +from bot.modules.fsm import InDbStorage + from .utils.config import config bot = Bot(token=config.telegram.bot_token) @@ -8,4 +10,4 @@ dp = Dispatcher(storage=InDbStorage()) console = Console() -__all__ = ['bot', 'dp', 'config', 'console'] +__all__ = ["bot", "dp", "config", "console"] diff --git a/bot/factories/full_menu.py b/bot/factories/full_menu.py index ecb577f..50e6b50 100644 --- a/bot/factories/full_menu.py +++ b/bot/factories/full_menu.py @@ -1,6 +1,7 @@ from typing import Literal + from aiogram.filters.callback_data import CallbackData -class FullMenuCallback(CallbackData, prefix='full_menu'): - action: Literal['home', 'settings'] +class FullMenuCallback(CallbackData, prefix="full_menu"): + action: Literal["home", "settings"] diff --git a/bot/factories/open_setting.py b/bot/factories/open_setting.py index 9793c93..3a2fa7b 100644 --- a/bot/factories/open_setting.py +++ b/bot/factories/open_setting.py @@ -1,10 +1,10 @@ from aiogram.filters.callback_data import CallbackData -class OpenSettingCallback(CallbackData, prefix='setting'): +class OpenSettingCallback(CallbackData, prefix="setting"): s_id: str -class SettingChoiceCallback(CallbackData, prefix='s_choice'): +class SettingChoiceCallback(CallbackData, prefix="s_choice"): s_id: str choice: str diff --git a/bot/filters/search.py b/bot/filters/search.py index 86d18fb..5405d04 100644 --- a/bot/filters/search.py +++ b/bot/filters/search.py @@ -4,22 +4,21 @@ from aiogram.types import InlineQuery class ServiceSearchFilter(BaseFilter): def __init__(self, service_letter: str): - self.service_letter = f'{service_letter}:' + self.service_letter = f"{service_letter}:" async def __call__(self, inline_query: InlineQuery): return ( - inline_query.query.startswith(self.service_letter) and - inline_query.query != self.service_letter + inline_query.query.startswith(self.service_letter) + and inline_query.query != self.service_letter ) class ServiceSearchMultiletterFilter(BaseFilter): def __init__(self, service_lettes: list[str]): - self.service_letter = [f'{letter}:' for letter in service_lettes] + self.service_letter = [f"{letter}:" for letter in service_lettes] async def __call__(self, inline_query: InlineQuery): return ( - any(inline_query.query.startswith(letter) for letter in - self.service_letter) and - inline_query.query not in self.service_letter + any(inline_query.query.startswith(letter) for letter in self.service_letter) + and inline_query.query not in self.service_letter ) diff --git a/bot/filters/url.py b/bot/filters/url.py index d2c2755..fc6cc46 100644 --- a/bot/filters/url.py +++ b/bot/filters/url.py @@ -1,32 +1,29 @@ +from urllib.parse import urlparse + from aiogram.filters import BaseFilter from aiogram.types import InlineQuery -from urllib.parse import urlparse - class MusicUrlFilter(BaseFilter): def __init__(self): pass async def __call__(self, inline_query: InlineQuery): - if not inline_query.query.strip().startswith('http'): + if not inline_query.query.strip().startswith("http"): return False url = urlparse(inline_query.query) - return ( - url.scheme in ['http', 'https'] and - any( - map( - url.netloc.endswith, - [ - 'youtube.com', - 'youtu.be', - 'open.spotify.com', - 'spotify.link', - 'deezer.page.link', - 'deezer.com', - 'soundcloud.com' - ] - ) - ) + return url.scheme in ["http", "https"] and any( + map( + url.netloc.endswith, + [ + "youtube.com", + "youtu.be", + "open.spotify.com", + "spotify.link", + "deezer.page.link", + "deezer.com", + "soundcloud.com", + ], + ) ) diff --git a/bot/handlers/__init__.py b/bot/handlers/__init__.py index 3716dd2..42db1b2 100644 --- a/bot/handlers/__init__.py +++ b/bot/handlers/__init__.py @@ -1,16 +1,17 @@ from aiogram import Router -from . import ( - initialize, - inline_song, - inline_url, - inline_error, - inline_default, - inline_empty, - on_chosen, -) from bot.middlewares import SaveChosenMiddleware, SettingsInjectorMiddleware +from . import ( + initialize, + inline_default, + inline_empty, + inline_error, + inline_song, + inline_url, + on_chosen, +) + router = Router() router.chosen_inline_result.outer_middleware(SaveChosenMiddleware()) diff --git a/bot/handlers/initialize/initializer.py b/bot/handlers/initialize/initializer.py index 629ce9e..354ce0e 100644 --- a/bot/handlers/initialize/initializer.py +++ b/bot/handlers/initialize/initializer.py @@ -1,10 +1,9 @@ -from aiogram import Router, Bot +from aiogram import Bot, Router from rich import print - router = Router() @router.startup() async def startup(bot: Bot): - print(f'[green]Started as[/] @{(await bot.me()).username}') + print(f"[green]Started as[/] @{(await bot.me()).username}") diff --git a/bot/handlers/inline_default/on_inline_default.py b/bot/handlers/inline_default/on_inline_default.py index 924d286..65f6b90 100644 --- a/bot/handlers/inline_default/on_inline_default.py +++ b/bot/handlers/inline_default/on_inline_default.py @@ -1,26 +1,24 @@ -from aiogram import Router, F - +from aiogram import F, Router from aiogram.types import InlineQuery +from bot.modules.settings import UserSettings from bot.results.deezer import get_deezer_search_results from bot.results.soundcloud import get_soundcloud_search_results -from bot.results.youtube import get_youtube_search_results from bot.results.spotify import get_spotify_search_results - -from bot.modules.settings import UserSettings +from bot.results.youtube import get_youtube_search_results router = Router() -@router.inline_query(F.query != '') +@router.inline_query(F.query != "") async def default_inline_query(inline_query: InlineQuery, settings: UserSettings): await inline_query.answer( await { - 'd': get_deezer_search_results, - 'c': get_soundcloud_search_results, - 'y': get_youtube_search_results, - 's': get_spotify_search_results - }[settings['default_search_provider'].value](inline_query.query, settings), + "d": get_deezer_search_results, + "c": get_soundcloud_search_results, + "y": get_youtube_search_results, + "s": get_spotify_search_results, + }[settings["default_search_provider"].value](inline_query.query, settings), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_empty/on_inline_empty.py b/bot/handlers/inline_empty/on_inline_empty.py index f745e2f..12a811d 100644 --- a/bot/handlers/inline_empty/on_inline_empty.py +++ b/bot/handlers/inline_empty/on_inline_empty.py @@ -1,24 +1,21 @@ -from aiogram import Router, F -from aiogram.types import ( - InlineQuery, InputTextMessageContent, InlineQueryResultArticle -) +from aiogram import F, Router +from aiogram.types import InlineQuery, InlineQueryResultArticle, InputTextMessageContent from bot.keyboards.inline.full_menu import get_full_menu_kb router = Router() -@router.inline_query(F.query == '') +@router.inline_query(F.query == "") async def empty_inline_query(inline_query: InlineQuery): await inline_query.answer( [ InlineQueryResultArticle( - id='show_menu', - title='⚙️ Open menu', - input_message_content=InputTextMessageContent( - message_text='⚙️ Menu' - ), - reply_markup=get_full_menu_kb() + id="show_menu", + title="⚙️ Open menu", + input_message_content=InputTextMessageContent(message_text="⚙️ Menu"), + reply_markup=get_full_menu_kb(), ) - ], cache_time=0 + ], + cache_time=0, ) diff --git a/bot/handlers/inline_error/on_inline_error_info.py b/bot/handlers/inline_error/on_inline_error_info.py index 2626e4f..b837095 100644 --- a/bot/handlers/inline_error/on_inline_error_info.py +++ b/bot/handlers/inline_error/on_inline_error_info.py @@ -1,17 +1,16 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.error import get_error_search_results from bot.filters import ServiceSearchFilter +from bot.results.error import get_error_search_results router = Router() -@router.inline_query(ServiceSearchFilter('error')) +@router.inline_query(ServiceSearchFilter("error")) async def search_spotify_inline_query(inline_query: InlineQuery): await inline_query.answer( - await get_error_search_results(inline_query.query.removeprefix('error:')), + await get_error_search_results(inline_query.query.removeprefix("error:")), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_song/__init__.py b/bot/handlers/inline_song/__init__.py index 7fddc2e..c336932 100644 --- a/bot/handlers/inline_song/__init__.py +++ b/bot/handlers/inline_song/__init__.py @@ -1,12 +1,16 @@ from aiogram import Router -from . import (on_inline_spotify, on_inline_deezer, on_inline_youtube, - on_inline_soundcloud) +from . import ( + on_inline_deezer, + on_inline_soundcloud, + on_inline_spotify, + on_inline_youtube, +) router = Router() router.include_routers( on_inline_spotify.router, on_inline_deezer.router, on_inline_youtube.router, - on_inline_soundcloud.router + on_inline_soundcloud.router, ) diff --git a/bot/handlers/inline_song/on_inline_deezer.py b/bot/handlers/inline_song/on_inline_deezer.py index 12a9298..d77b065 100644 --- a/bot/handlers/inline_song/on_inline_deezer.py +++ b/bot/handlers/inline_song/on_inline_deezer.py @@ -1,21 +1,19 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.deezer import get_deezer_search_results from bot.filters import ServiceSearchFilter from bot.modules.settings import UserSettings +from bot.results.deezer import get_deezer_search_results router = Router() -@router.inline_query(ServiceSearchFilter('d')) +@router.inline_query(ServiceSearchFilter("d")) async def search_deezer_inline_query(inline_query: InlineQuery, settings: UserSettings): await inline_query.answer( await get_deezer_search_results( - inline_query.query.removeprefix('d:'), - settings + inline_query.query.removeprefix("d:"), settings ), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_song/on_inline_soundcloud.py b/bot/handlers/inline_song/on_inline_soundcloud.py index 2618cc2..24ae286 100644 --- a/bot/handlers/inline_song/on_inline_soundcloud.py +++ b/bot/handlers/inline_song/on_inline_soundcloud.py @@ -1,24 +1,21 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.soundcloud import get_soundcloud_search_results from bot.filters import ServiceSearchMultiletterFilter from bot.modules.settings import UserSettings +from bot.results.soundcloud import get_soundcloud_search_results router = Router() -@router.inline_query(ServiceSearchMultiletterFilter(['c', 'с'])) +@router.inline_query(ServiceSearchMultiletterFilter(["c", "с"])) async def search_soundcloud_inline_query( - inline_query: InlineQuery, - settings: UserSettings + inline_query: InlineQuery, settings: UserSettings ): await inline_query.answer( await get_soundcloud_search_results( - inline_query.query.removeprefix('c:').removesuffix('с:'), - settings + inline_query.query.removeprefix("c:").removesuffix("с:"), settings ), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_song/on_inline_spotify.py b/bot/handlers/inline_song/on_inline_spotify.py index 1d54e2f..f88f6c3 100644 --- a/bot/handlers/inline_song/on_inline_spotify.py +++ b/bot/handlers/inline_song/on_inline_spotify.py @@ -1,22 +1,21 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.spotify import get_spotify_search_results from bot.filters import ServiceSearchFilter from bot.modules.settings import UserSettings +from bot.results.spotify import get_spotify_search_results router = Router() -@router.inline_query(ServiceSearchFilter('s')) +@router.inline_query(ServiceSearchFilter("s")) async def search_spotify_inline_query( - inline_query: InlineQuery, - settings: UserSettings + inline_query: InlineQuery, settings: UserSettings ): await inline_query.answer( - await get_spotify_search_results(inline_query.query.removeprefix('s:'), - settings), + await get_spotify_search_results( + inline_query.query.removeprefix("s:"), settings + ), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_song/on_inline_youtube.py b/bot/handlers/inline_song/on_inline_youtube.py index 6271663..297598f 100644 --- a/bot/handlers/inline_song/on_inline_youtube.py +++ b/bot/handlers/inline_song/on_inline_youtube.py @@ -1,20 +1,21 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.youtube import get_youtube_search_results from bot.filters import ServiceSearchFilter from bot.modules.settings import UserSettings +from bot.results.youtube import get_youtube_search_results router = Router() -@router.inline_query(ServiceSearchFilter('y')) -async def search_youtube_inline_query(inline_query: InlineQuery, - settings: UserSettings): +@router.inline_query(ServiceSearchFilter("y")) +async def search_youtube_inline_query( + inline_query: InlineQuery, settings: UserSettings +): await inline_query.answer( - await get_youtube_search_results(inline_query.query.removeprefix('y:'), - settings), + await get_youtube_search_results( + inline_query.query.removeprefix("y:"), settings + ), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/inline_url/on_inline_url.py b/bot/handlers/inline_url/on_inline_url.py index 4707347..d872770 100644 --- a/bot/handlers/inline_url/on_inline_url.py +++ b/bot/handlers/inline_url/on_inline_url.py @@ -1,10 +1,9 @@ from aiogram import Router - from aiogram.types import InlineQuery -from bot.results.url import get_url_results from bot.filters import MusicUrlFilter from bot.modules.settings import UserSettings +from bot.results.url import get_url_results router = Router() @@ -14,5 +13,5 @@ async def url_deezer_inline_query(inline_query: InlineQuery, settings: UserSetti await inline_query.answer( await get_url_results(inline_query.query, settings), cache_time=0, - is_personal=True + is_personal=True, ) diff --git a/bot/handlers/on_chosen/__init__.py b/bot/handlers/on_chosen/__init__.py index 4d1ec12..d21b3ea 100644 --- a/bot/handlers/on_chosen/__init__.py +++ b/bot/handlers/on_chosen/__init__.py @@ -1,5 +1,6 @@ from aiogram import Router -from . import spotify, deezer, youtube, soundcloud, recode_cached, suppress_verify + +from . import deezer, recode_cached, soundcloud, spotify, suppress_verify, youtube router = Router() @@ -12,4 +13,4 @@ router.include_routers( suppress_verify.router, ) -__all__ = ['router'] +__all__ = ["router"] diff --git a/bot/handlers/on_chosen/deezer.py b/bot/handlers/on_chosen/deezer.py index a31821a..c1b53ec 100644 --- a/bot/handlers/on_chosen/deezer.py +++ b/bot/handlers/on_chosen/deezer.py @@ -1,21 +1,23 @@ -from aiogram import Router, Bot, F +from aiogram import Bot, F, Router from aiogram.types import ( - BufferedInputFile, URLInputFile, InputMediaAudio, + BufferedInputFile, ChosenInlineResult, + InputMediaAudio, + URLInputFile, ) -from bot.modules.deezer import deezer, DeezerBytestream -from bot.utils.config import config from bot.modules.database import db +from bot.modules.deezer import DeezerBytestream, deezer +from bot.utils.config import config router = Router() -@router.chosen_inline_result(F.result_id.startswith('deez::')) +@router.chosen_inline_result(F.result_id.startswith("deez::")) async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot): - bytestream: DeezerBytestream = await (await deezer.downloader.from_id( - chosen_result.result_id.removeprefix('deez::') - )).to_bytestream() + bytestream: DeezerBytestream = await ( + await deezer.downloader.from_id(chosen_result.result_id.removeprefix("deez::")) + ).to_bytestream() audio = await bot.send_audio( chat_id=config.telegram.files_chat, @@ -34,5 +36,5 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot): await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, media=InputMediaAudio(media=audio.audio.file_id), - reply_markup=None + reply_markup=None, ) diff --git a/bot/handlers/on_chosen/recode_cached.py b/bot/handlers/on_chosen/recode_cached.py index d693a2e..f69fd95 100644 --- a/bot/handlers/on_chosen/recode_cached.py +++ b/bot/handlers/on_chosen/recode_cached.py @@ -1,57 +1,48 @@ -from aiogram import Router, Bot, F -from aiogram.types import ( - BufferedInputFile, InputMediaAudio, - ChosenInlineResult, -) +from io import BytesIO -from bot.modules.youtube.downloader import YouTubeBytestream +from aiogram import Bot, F, Router +from aiogram.types import BufferedInputFile, ChosenInlineResult, InputMediaAudio -from bot.utils.config import config from bot.modules.database import db from bot.modules.settings import UserSettings - -from io import BytesIO +from bot.modules.youtube.downloader import YouTubeBytestream +from bot.utils.config import config router = Router() @router.chosen_inline_result( - F.result_id.startswith('spotc::') | F.result_id.startswith('ytc::') + F.result_id.startswith("spotc::") | F.result_id.startswith("ytc::") ) -async def on_cached_chosen(chosen_result: ChosenInlineResult, bot: Bot, - settings: UserSettings): - if settings['recode_youtube'].value != 'yes': +async def on_cached_chosen( + chosen_result: ChosenInlineResult, bot: Bot, settings: UserSettings +): + if settings["recode_youtube"].value != "yes": await bot.edit_message_reply_markup( - inline_message_id=chosen_result.inline_message_id, - reply_markup=None + inline_message_id=chosen_result.inline_message_id, reply_markup=None ) return - if ( - type( - db.recoded.get( - song_id := chosen_result.result_id - .removeprefix('spotc::') - .removeprefix('ytc::') + if type( + db.recoded.get( + song_id := chosen_result.result_id.removeprefix("spotc::").removeprefix( + "ytc::" ) - ) in [bool, type(None)] - ): + ) + ) in [bool, type(None)]: await bot.edit_message_reply_markup( - inline_message_id=chosen_result.inline_message_id, - reply_markup=None + inline_message_id=chosen_result.inline_message_id, reply_markup=None ) return await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🔄 Recoding...', - reply_markup=None + caption="🔄 Recoding...", + reply_markup=None, ) message = await bot.forward_message( - config.telegram.files_chat, - config.telegram.files_chat, - db.recoded[song_id] + config.telegram.files_chat, config.telegram.files_chat, db.recoded[song_id] ) song_io: BytesIO = await bot.download( # type: ignore @@ -76,7 +67,7 @@ async def on_cached_chosen(chosen_result: ChosenInlineResult, bot: Bot, ), thumbnail=BufferedInputFile( file=(await bot.download(message.audio.thumbnail.file_id)).read(), - filename='thumbnail.jpg' + filename="thumbnail.jpg", ), performer=message.audio.performer, title=message.audio.title, @@ -85,15 +76,15 @@ async def on_cached_chosen(chosen_result: ChosenInlineResult, bot: Bot, await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='', + caption="", reply_markup=None, ) await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, - media=InputMediaAudio(media=audio.audio.file_id) + media=InputMediaAudio(media=audio.audio.file_id), ) - if chosen_result.result_id.startswith('spotc::'): + if chosen_result.result_id.startswith("spotc::"): db.spotify[song_id] = audio.audio.file_id else: db.youtube[song_id] = audio.audio.file_id diff --git a/bot/handlers/on_chosen/soundcloud.py b/bot/handlers/on_chosen/soundcloud.py index fc66b68..3638793 100644 --- a/bot/handlers/on_chosen/soundcloud.py +++ b/bot/handlers/on_chosen/soundcloud.py @@ -1,21 +1,25 @@ -from aiogram import Router, Bot, F +from aiogram import Bot, F, Router from aiogram.types import ( - BufferedInputFile, URLInputFile, InputMediaAudio, + BufferedInputFile, ChosenInlineResult, + InputMediaAudio, + URLInputFile, ) -from bot.modules.soundcloud import soundcloud, SoundCloudBytestream -from bot.utils.config import config from bot.modules.database import db +from bot.modules.soundcloud import SoundCloudBytestream, soundcloud +from bot.utils.config import config router = Router() -@router.chosen_inline_result(F.result_id.startswith('sc::')) +@router.chosen_inline_result(F.result_id.startswith("sc::")) async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot): - bytestream: SoundCloudBytestream = await (await soundcloud.downloader.from_id( - chosen_result.result_id.removeprefix('sc::') - )).to_bytestream() + bytestream: SoundCloudBytestream = await ( + await soundcloud.downloader.from_id( + chosen_result.result_id.removeprefix("sc::") + ) + ).to_bytestream() audio = await bot.send_audio( chat_id=config.telegram.files_chat, @@ -33,5 +37,5 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot): await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, media=InputMediaAudio(media=audio.audio.file_id), - reply_markup=None + reply_markup=None, ) diff --git a/bot/handlers/on_chosen/spotify.py b/bot/handlers/on_chosen/spotify.py index 1155579..6253c30 100644 --- a/bot/handlers/on_chosen/spotify.py +++ b/bot/handlers/on_chosen/spotify.py @@ -1,31 +1,34 @@ -from aiogram import Router, Bot, F +from aiogram import Bot, F, Router from aiogram.types import ( - BufferedInputFile, URLInputFile, InputMediaAudio, + BufferedInputFile, ChosenInlineResult, + InputMediaAudio, + URLInputFile, ) -from bot.modules.spotify import spotify -from bot.modules.youtube import youtube, AgeRestrictedError -from bot.modules.youtube.song import SongItem -from bot.modules.deezer import deezer -from bot.utils.config import config from bot.modules.database import db +from bot.modules.deezer import deezer from bot.modules.settings import UserSettings +from bot.modules.spotify import spotify +from bot.modules.youtube import AgeRestrictedError, youtube +from bot.modules.youtube.song import SongItem +from bot.utils.config import config router = Router() def not_strict_name(song, yt_song): - if 'feat' in yt_song.name.lower(): + if "feat" in yt_song.name.lower(): return any(artist.lower() in yt_song.name.lower() for artist in song.artists) else: return False -@router.chosen_inline_result(F.result_id.startswith('spot::')) -async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, - settings: UserSettings): - song = spotify.songs.from_id(chosen_result.result_id.removeprefix('spot::')) +@router.chosen_inline_result(F.result_id.startswith("spot::")) +async def on_new_chosen( + chosen_result: ChosenInlineResult, bot: Bot, settings: UserSettings +): + song = spotify.songs.from_id(chosen_result.result_id.removeprefix("spot::")) bytestream = None audio = None @@ -34,14 +37,15 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, song.full_name, exact_match=True, ) - if settings['exact_spotify_search'].value == 'yes': - if ((song.all_artists != yt_song.all_artists or song.name != yt_song.name) - and not not_strict_name(song, yt_song)): + if settings["exact_spotify_search"].value == "yes": + if ( + song.all_artists != yt_song.all_artists or song.name != yt_song.name + ) and not not_strict_name(song, yt_song): await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🙄 Cannot find this song on YouTube, trying Deezer...', + caption="🙄 Cannot find this song on YouTube, trying Deezer...", reply_markup=None, - parse_mode='HTML', + parse_mode="HTML", ) yt_song = None bytestream = False @@ -66,9 +70,9 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, except AgeRestrictedError: await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🔞 This song is age restricted, trying Deezer...', + caption="🔞 This song is age restricted, trying Deezer...", reply_markup=None, - parse_mode='HTML', + parse_mode="HTML", ) yt_song = None @@ -99,29 +103,29 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, assert e if audio: - if settings['exact_spotify_search'].value == 'yes': + if settings["exact_spotify_search"].value == "yes": db.spotify[song.id] = audio.audio.file_id await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, media=InputMediaAudio(media=audio.audio.file_id), - reply_markup=None + reply_markup=None, ) else: await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🤷‍♂️ Cannot download this song', + caption="🤷‍♂️ Cannot download this song", reply_markup=None, - parse_mode='HTML', + parse_mode="HTML", ) - if yt_song and settings['recode_youtube'].value == 'yes': + if yt_song and settings["recode_youtube"].value == "yes": await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🔄 Recoding...', + caption="🔄 Recoding...", reply_markup=None, - parse_mode='HTML', + parse_mode="HTML", ) await bytestream.rerender() @@ -139,20 +143,20 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, db.youtube[yt_song.id] = audio.audio.file_id db.recoded[yt_song.id] = True - if settings['exact_spotify_search'].value == 'yes': + if settings["exact_spotify_search"].value == "yes": db.spotify[song.id] = audio.audio.file_id db.recoded[song.id] = True await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='', + caption="", reply_markup=None, ) await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, - media=InputMediaAudio(media=audio.audio.file_id) + media=InputMediaAudio(media=audio.audio.file_id), ) - elif yt_song and settings['recode_youtube'].value == 'no': + elif yt_song and settings["recode_youtube"].value == "no": db.recoded[yt_song.id] = audio.message_id - if settings['exact_spotify_search'].value == 'yes': + if settings["exact_spotify_search"].value == "yes": db.recoded[song.id] = audio.message_id diff --git a/bot/handlers/on_chosen/suppress_verify.py b/bot/handlers/on_chosen/suppress_verify.py index b073544..402f321 100644 --- a/bot/handlers/on_chosen/suppress_verify.py +++ b/bot/handlers/on_chosen/suppress_verify.py @@ -1,16 +1,13 @@ -from aiogram import Router, Bot, F -from aiogram.types import ( - ChosenInlineResult, -) +from aiogram import Bot, F, Router +from aiogram.types import ChosenInlineResult router = Router() @router.chosen_inline_result( - F.result_id.startswith('deezc::') | F.result_id.startswith('scc::') + F.result_id.startswith("deezc::") | F.result_id.startswith("scc::") ) async def on_unneeded_cached_chosen(chosen_result: ChosenInlineResult, bot: Bot): await bot.edit_message_reply_markup( - inline_message_id=chosen_result.inline_message_id, - reply_markup=None + inline_message_id=chosen_result.inline_message_id, reply_markup=None ) diff --git a/bot/handlers/on_chosen/youtube.py b/bot/handlers/on_chosen/youtube.py index 444bdaa..75bfcb5 100644 --- a/bot/handlers/on_chosen/youtube.py +++ b/bot/handlers/on_chosen/youtube.py @@ -1,30 +1,33 @@ -from aiogram import Router, Bot, F +from aiogram import Bot, F, Router from aiogram.types import ( - BufferedInputFile, URLInputFile, InputMediaAudio, + BufferedInputFile, ChosenInlineResult, + InputMediaAudio, + URLInputFile, ) -from bot.modules.youtube import youtube, AgeRestrictedError -from bot.utils.config import config from bot.modules.database import db from bot.modules.settings import UserSettings +from bot.modules.youtube import AgeRestrictedError, youtube +from bot.utils.config import config router = Router() -@router.chosen_inline_result(F.result_id.startswith('yt::')) -async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, - settings: UserSettings): - song = youtube.songs.from_id(chosen_result.result_id.removeprefix('yt::')) +@router.chosen_inline_result(F.result_id.startswith("yt::")) +async def on_new_chosen( + chosen_result: ChosenInlineResult, bot: Bot, settings: UserSettings +): + song = youtube.songs.from_id(chosen_result.result_id.removeprefix("yt::")) try: bytestream = await song.to_bytestream() except AgeRestrictedError: await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🔞 This song is age restricted, so I can\'t download it. ' - 'Try downloading it from Deezer or SoundCloud', - reply_markup=None + caption="🔞 This song is age restricted, so I can't download it. " + "Try downloading it from Deezer or SoundCloud", + reply_markup=None, ) return @@ -45,14 +48,14 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, media=InputMediaAudio(media=audio.audio.file_id), - reply_markup=None + reply_markup=None, ) - if settings['recode_youtube'].value == 'yes': + if settings["recode_youtube"].value == "yes": await bot.edit_message_caption( inline_message_id=chosen_result.inline_message_id, - caption='🔄 Recoding...', - reply_markup=None + caption="🔄 Recoding...", + reply_markup=None, ) await bytestream.rerender() @@ -75,7 +78,7 @@ async def on_new_chosen(chosen_result: ChosenInlineResult, bot: Bot, await bot.edit_message_media( inline_message_id=chosen_result.inline_message_id, media=InputMediaAudio(media=audio.audio.file_id), - reply_markup=None + reply_markup=None, ) else: db.recoded[song.id] = audio.message_id diff --git a/bot/keyboards/inline/full_menu.py b/bot/keyboards/inline/full_menu.py index 639c268..8a1c84b 100644 --- a/bot/keyboards/inline/full_menu.py +++ b/bot/keyboards/inline/full_menu.py @@ -1,27 +1,23 @@ -from aiogram.utils.keyboard import (InlineKeyboardMarkup, InlineKeyboardButton, - InlineKeyboardBuilder) -from bot.factories.full_menu import FullMenuCallback +from aiogram.utils.keyboard import ( + InlineKeyboardBuilder, + InlineKeyboardButton, + InlineKeyboardMarkup, +) +from bot.factories.full_menu import FullMenuCallback from bot.keyboards.inline import search_variants as sv def get_full_menu_kb() -> InlineKeyboardMarkup: - buttons = (sv.get_search_variants( - query='', - services= - sv.soundcloud | - sv.spotify | - sv.deezer | - sv.youtube + buttons = sv.get_search_variants( + query="", services=sv.soundcloud | sv.spotify | sv.deezer | sv.youtube ) + [ - [ - InlineKeyboardButton( - text='⚙️ Settings', - callback_data=FullMenuCallback( - action='settings' - ).pack() - ) - ], - ]) + [ + InlineKeyboardButton( + text="⚙️ Settings", + callback_data=FullMenuCallback(action="settings").pack(), + ) + ], + ] return InlineKeyboardBuilder(buttons).as_markup() diff --git a/bot/keyboards/inline/search_variants.py b/bot/keyboards/inline/search_variants.py index 5170348..111ec56 100644 --- a/bot/keyboards/inline/search_variants.py +++ b/bot/keyboards/inline/search_variants.py @@ -1,42 +1,33 @@ -from aiogram.utils.keyboard import (InlineKeyboardMarkup, InlineKeyboardButton, - InlineKeyboardBuilder) +from aiogram.utils.keyboard import ( + InlineKeyboardBuilder, + InlineKeyboardButton, + InlineKeyboardMarkup, +) - -deezer = { - 'd': '🎵 Search in Deezer' -} -soundcloud = { - 'c': '☁️ Search in SoundCloud' -} -youtube = { - 'y': '▶️ Search in YouTube' -} -spotify = { - 's': '🎧 Search in Spotify' -} +deezer = {"d": "🎵 Search in Deezer"} +soundcloud = {"c": "☁️ Search in SoundCloud"} +youtube = {"y": "▶️ Search in YouTube"} +spotify = {"s": "🎧 Search in Spotify"} def get_search_variants( - query: str, - services: dict[str, str], + query: str, + services: dict[str, str], ) -> list[list[InlineKeyboardButton]]: buttons = [ [ InlineKeyboardButton( - text=services[key], - switch_inline_query_current_chat=f'{key}:{query}' + text=services[key], switch_inline_query_current_chat=f"{key}:{query}" ) - ] for key in services.keys() + ] + for key in services.keys() ] return buttons def get_search_variants_kb( - query: str, - services: dict[str, str], + query: str, + services: dict[str, str], ) -> InlineKeyboardMarkup: - return InlineKeyboardBuilder(get_search_variants( - query, - services - )).as_markup() + return InlineKeyboardBuilder(get_search_variants(query, services)).as_markup() diff --git a/bot/keyboards/inline/setting.py b/bot/keyboards/inline/setting.py index fc5a729..3e845e1 100644 --- a/bot/keyboards/inline/setting.py +++ b/bot/keyboards/inline/setting.py @@ -1,8 +1,11 @@ -from aiogram.utils.keyboard import (InlineKeyboardMarkup, InlineKeyboardButton, - InlineKeyboardBuilder) -from bot.factories.open_setting import SettingChoiceCallback -from bot.factories.full_menu import FullMenuCallback +from aiogram.utils.keyboard import ( + InlineKeyboardBuilder, + InlineKeyboardButton, + InlineKeyboardMarkup, +) +from bot.factories.full_menu import FullMenuCallback +from bot.factories.open_setting import SettingChoiceCallback from bot.modules.settings import UserSettings @@ -11,22 +14,21 @@ def get_setting_kb(s_id: str, user_id: str) -> InlineKeyboardMarkup: buttons = [ [ InlineKeyboardButton( - text=( - '✅ ' if setting.value == choice else '' - ) + setting.choices[choice], + text=("✅ " if setting.value == choice else "") + + setting.choices[choice], callback_data=SettingChoiceCallback( s_id=s_id, choice=choice, - ).pack() + ).pack(), ) - ] for choice in setting.choices.keys() - ] + [[ - InlineKeyboardButton( - text='🔙', - callback_data=FullMenuCallback( - action='settings' - ).pack() - ) - ]] + ] + for choice in setting.choices.keys() + ] + [ + [ + InlineKeyboardButton( + text="🔙", callback_data=FullMenuCallback(action="settings").pack() + ) + ] + ] return InlineKeyboardBuilder(buttons).as_markup() diff --git a/bot/keyboards/inline/settings.py b/bot/keyboards/inline/settings.py index d643984..0158550 100644 --- a/bot/keyboards/inline/settings.py +++ b/bot/keyboards/inline/settings.py @@ -1,8 +1,11 @@ -from aiogram.utils.keyboard import (InlineKeyboardMarkup, InlineKeyboardButton, - InlineKeyboardBuilder) -from bot.factories.open_setting import OpenSettingCallback -from bot.factories.full_menu import FullMenuCallback +from aiogram.utils.keyboard import ( + InlineKeyboardBuilder, + InlineKeyboardButton, + InlineKeyboardMarkup, +) +from bot.factories.full_menu import FullMenuCallback +from bot.factories.open_setting import OpenSettingCallback from bot.modules.settings import settings_strings @@ -13,16 +16,16 @@ def get_settings_kb() -> InlineKeyboardMarkup: text=settings_strings[setting_id].name, callback_data=OpenSettingCallback( s_id=setting_id, - ).pack() + ).pack(), ) - ] for setting_id in settings_strings.keys() - ] + [[ - InlineKeyboardButton( - text='🔙', - callback_data=FullMenuCallback( - action='home' - ).pack() - ) - ]] + ] + for setting_id in settings_strings.keys() + ] + [ + [ + InlineKeyboardButton( + text="🔙", callback_data=FullMenuCallback(action="home").pack() + ) + ] + ] return InlineKeyboardBuilder(buttons).as_markup() diff --git a/bot/middlewares/__init__.py b/bot/middlewares/__init__.py index ac8128d..7308169 100644 --- a/bot/middlewares/__init__.py +++ b/bot/middlewares/__init__.py @@ -1,3 +1,3 @@ +from .inject_settings import SettingsInjectorMiddleware from .private_button import PrivateButtonMiddleware from .save_chosen import SaveChosenMiddleware -from .inject_settings import SettingsInjectorMiddleware diff --git a/bot/middlewares/inject_settings.py b/bot/middlewares/inject_settings.py index eb0a120..ec9a2f7 100644 --- a/bot/middlewares/inject_settings.py +++ b/bot/middlewares/inject_settings.py @@ -1,26 +1,27 @@ +from typing import Any, Awaitable, Callable, Dict + from aiogram.dispatcher.middlewares.base import BaseMiddleware from aiogram.types import TelegramObject -from typing import Any, Awaitable, Callable, Dict - from bot.modules.settings import UserSettings class SettingsInjectorMiddleware(BaseMiddleware): async def __call__( - self, - handler: Callable[[TelegramObject, Dict[str, Any]], Awaitable[Any]], - event: TelegramObject, - data: Dict[str, Any], + self, + handler: Callable[[TelegramObject, Dict[str, Any]], Awaitable[Any]], + event: TelegramObject, + data: Dict[str, Any], ): - if (not hasattr(event, 'from_user') and - (not hasattr(event, 'inline_query') or event.inline_query is None)): + if not hasattr(event, "from_user") and ( + not hasattr(event, "inline_query") or event.inline_query is None + ): return await handler(event, data) - elif hasattr(event, 'inline_query') and event.inline_query is not None: + elif hasattr(event, "inline_query") and event.inline_query is not None: settings = UserSettings(event.inline_query.from_user.id) - data['settings'] = settings + data["settings"] = settings else: settings = UserSettings(event.from_user.id) - data['settings'] = settings + data["settings"] = settings return await handler(event, data) diff --git a/bot/middlewares/private_button.py b/bot/middlewares/private_button.py index a2a1743..6412dd5 100644 --- a/bot/middlewares/private_button.py +++ b/bot/middlewares/private_button.py @@ -1,19 +1,19 @@ +from typing import Any, Awaitable, Callable, Dict + from aiogram.dispatcher.middlewares.base import BaseMiddleware from aiogram.types import CallbackQuery -from typing import Any, Awaitable, Callable, Dict - from bot.modules.database import db class PrivateButtonMiddleware(BaseMiddleware): async def __call__( - self, - handler: Callable[[CallbackQuery, Dict[str, Any]], Awaitable[Any]], - event: CallbackQuery, - data: Dict[str, Any], + self, + handler: Callable[[CallbackQuery, Dict[str, Any]], Awaitable[Any]], + event: CallbackQuery, + data: Dict[str, Any], ): if event.from_user.id == db.inline[event.inline_message_id].from_user.id: return await handler(event, data) else: - await event.answer('This button is not for you') + await event.answer("This button is not for you") diff --git a/bot/middlewares/save_chosen.py b/bot/middlewares/save_chosen.py index 6a59e61..15f1c35 100644 --- a/bot/middlewares/save_chosen.py +++ b/bot/middlewares/save_chosen.py @@ -1,9 +1,9 @@ +from dataclasses import dataclass +from typing import Any, Awaitable, Callable, Dict + from aiogram.dispatcher.middlewares.base import BaseMiddleware from aiogram.types import ChosenInlineResult -from typing import Any, Awaitable, Callable, Dict -from dataclasses import dataclass - from bot.modules.database import db @@ -26,10 +26,10 @@ class SavedResult: class SaveChosenMiddleware(BaseMiddleware): async def __call__( - self, - handler: Callable[[ChosenInlineResult, Dict[str, Any]], Awaitable[Any]], - event: ChosenInlineResult, - data: Dict[str, Any], + self, + handler: Callable[[ChosenInlineResult, Dict[str, Any]], Awaitable[Any]], + event: ChosenInlineResult, + data: Dict[str, Any], ): db.inline[event.inline_message_id] = SavedResult( result_id=event.result_id, @@ -38,9 +38,9 @@ class SaveChosenMiddleware(BaseMiddleware): first_name=event.from_user.first_name, last_name=event.from_user.last_name, username=event.from_user.username, - language_code=event.from_user.language_code + language_code=event.from_user.language_code, ), query=event.query, - inline_message_id=event.inline_message_id + inline_message_id=event.inline_message_id, ) return await handler(event, data) diff --git a/bot/modules/common/song/song.py b/bot/modules/common/song/song.py index b2f822f..3ce9d13 100644 --- a/bot/modules/common/song/song.py +++ b/bot/modules/common/song/song.py @@ -11,7 +11,7 @@ class BaseSongItem: @property def all_artists(self): - return ', '.join(self.artists) + return ", ".join(self.artists) @property def full_name(self): diff --git a/bot/modules/database/__init__.py b/bot/modules/database/__init__.py index f54bf8c..0d1f9fc 100644 --- a/bot/modules/database/__init__.py +++ b/bot/modules/database/__init__.py @@ -1,6 +1,5 @@ from .db import Db - db = Db() -__all__ = ['db'] +__all__ = ["db"] diff --git a/bot/modules/database/db.py b/bot/modules/database/db.py index 0484ddc..b5fc144 100644 --- a/bot/modules/database/db.py +++ b/bot/modules/database/db.py @@ -3,13 +3,13 @@ from .db_model import DBDict class Db(object): def __init__(self): - self.fsm = DBDict('fsm') - self.config = DBDict('config') - self.inline = DBDict('inline') - self.errors = DBDict('errors') - self.settings = DBDict('settings') - self.spotify = DBDict('spotify') - self.deezer = DBDict('deezer') - self.youtube = DBDict('youtube') - self.soundcloud = DBDict('soundcloud') - self.recoded = DBDict('recoded') + self.fsm = DBDict("fsm") + self.config = DBDict("config") + self.inline = DBDict("inline") + self.errors = DBDict("errors") + self.settings = DBDict("settings") + self.spotify = DBDict("spotify") + self.deezer = DBDict("deezer") + self.youtube = DBDict("youtube") + self.soundcloud = DBDict("soundcloud") + self.recoded = DBDict("recoded") diff --git a/bot/modules/database/db_model.py b/bot/modules/database/db_model.py index 87133dc..23fa2b0 100644 --- a/bot/modules/database/db_model.py +++ b/bot/modules/database/db_model.py @@ -1,4 +1,5 @@ from sqlitedict import SqliteDict + from bot.utils.config import config diff --git a/bot/modules/deezer/__init__.py b/bot/modules/deezer/__init__.py index 53e4f3c..01bf107 100644 --- a/bot/modules/deezer/__init__.py +++ b/bot/modules/deezer/__init__.py @@ -1,10 +1,10 @@ -from .deezer import Deezer -from .downloader import DeezerBytestream from bot.utils.config import config +from .deezer import Deezer +from .downloader import DeezerBytestream deezer = Deezer( arl=config.tokens.deezer.arl, ) -__all__ = ['deezer', 'DeezerBytestream'] +__all__ = ["deezer", "DeezerBytestream"] diff --git a/bot/modules/deezer/deezer.py b/bot/modules/deezer/deezer.py index d117b07..0997b07 100644 --- a/bot/modules/deezer/deezer.py +++ b/bot/modules/deezer/deezer.py @@ -1,9 +1,9 @@ import asyncio -from .song import Songs -from .engine import DeezerEngine -from .driver import DeezerDriver from .downloader import DownloaderBuilder +from .driver import DeezerDriver +from .engine import DeezerEngine +from .song import Songs class Deezer(object): diff --git a/bot/modules/deezer/downloader.py b/bot/modules/deezer/downloader.py index ab24c3a..fd48a52 100644 --- a/bot/modules/deezer/downloader.py +++ b/bot/modules/deezer/downloader.py @@ -1,12 +1,11 @@ -from attrs import define - from io import BytesIO -from .driver import DeezerDriver +from attrs import define from . import track_formats -from .util import UrlDecrypter, ChunkDecrypter +from .driver import DeezerDriver from .song import FullSongItem +from .util import ChunkDecrypter, UrlDecrypter @define @@ -17,10 +16,7 @@ class DeezerBytestream: @classmethod def from_bytestream( - cls, - bytestream: BytesIO, - filename: str, - full_song: FullSongItem + cls, bytestream: BytesIO, filename: str, full_song: FullSongItem ): bytestream.seek(0) return cls( @@ -38,21 +34,18 @@ class Downloader: song: FullSongItem @classmethod - async def build( - cls, - song_id: str, - driver: DeezerDriver - ): + async def build(cls, song_id: str, driver: DeezerDriver): track = await driver.reverse_get_track(song_id) try: return cls( song_id=str(song_id), driver=driver, - track=track['results'], - song=await FullSongItem.from_deezer(track) + track=track["results"], + song=await FullSongItem.from_deezer(track), ) except KeyError: from icecream import ic + ic(track) await driver.renew_engine() return await cls.build(song_id, driver) @@ -65,7 +58,7 @@ class Downloader: audio = BytesIO() async for chunk in self.driver.engine.get_data_iter( - await self._get_download_url(quality=quality) + await self._get_download_url(quality=quality) ): if i % 3 > 0 or len(chunk) < 2 * 1024: audio.write(chunk) @@ -76,18 +69,16 @@ class Downloader: return DeezerBytestream.from_bytestream( filename=self.song.full_name + track_formats.TRACK_FORMAT_MAP[quality].ext, bytestream=audio, - full_song=self.song + full_song=self.song, ) - async def _get_download_url(self, quality: str = 'MP3_128'): + async def _get_download_url(self, quality: str = "MP3_128"): md5_origin = self.track["MD5_ORIGIN"] track_id = self.track["SNG_ID"] media_version = self.track["MEDIA_VERSION"] url_decrypter = UrlDecrypter( - md5_origin=md5_origin, - track_id=track_id, - media_version=media_version + md5_origin=md5_origin, track_id=track_id, media_version=media_version ) return url_decrypter.get_url_for(track_formats.TRACK_FORMAT_MAP[quality]) @@ -98,7 +89,4 @@ class DownloaderBuilder: driver: DeezerDriver async def from_id(self, song_id: str): - return await Downloader.build( - song_id=song_id, - driver=self.driver - ) + return await Downloader.build(song_id=song_id, driver=self.driver) diff --git a/bot/modules/deezer/driver.py b/bot/modules/deezer/driver.py index 5b3aa15..19a4e9f 100644 --- a/bot/modules/deezer/driver.py +++ b/bot/modules/deezer/driver.py @@ -1,7 +1,6 @@ from attrs import define from .engine import DeezerEngine - from .util import clean_query @@ -10,30 +9,19 @@ class DeezerDriver: engine: DeezerEngine async def get_track(self, track_id: int | str): - data = await self.engine.call_legacy_api( - f'track/{track_id}' - ) + data = await self.engine.call_legacy_api(f"track/{track_id}") return data async def reverse_get_track(self, track_id: str): - return await self.engine.call_api( - 'song.getData', - params={ - 'SNG_ID': track_id - } - ) + return await self.engine.call_api("song.getData", params={"SNG_ID": track_id}) async def search(self, query: str, limit: int = 30): data = await self.engine.call_legacy_api( - 'search/track', - params={ - 'q': clean_query(query), - 'limit': limit - } + "search/track", params={"q": clean_query(query), "limit": limit} ) - return data['data'] + return data["data"] async def renew_engine(self): self.engine = await self.engine.from_arl(self.engine.arl) diff --git a/bot/modules/deezer/engine.py b/bot/modules/deezer/engine.py index 08eaeec..cb42c81 100644 --- a/bot/modules/deezer/engine.py +++ b/bot/modules/deezer/engine.py @@ -1,19 +1,16 @@ import aiohttp - from aiohttp import ClientResponse - from attrs import define - HTTP_HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " - "(KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", + "(KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Content-Language": "en-US", "Cache-Control": "max-age=0", "Accept": "*/*", "Accept-Charset": "utf-8,ISO-8859-1;q=0.7,*;q=0.3", "Accept-Language": "en-US,en;q=0.9,en-US;q=0.8,en;q=0.7", - "Connection": 'keep-alive' + "Connection": "keep-alive", } @@ -25,28 +22,22 @@ class DeezerEngine: @classmethod async def from_arl(cls, arl: str): - cookies = {'arl': arl} + cookies = {"arl": arl} data, cookies = await cls(cookies).call_api( - 'deezer.getUserData', get_cookies=True + "deezer.getUserData", get_cookies=True ) - data = data['results'] - token = data['checkForm'] + data = data["results"] + token = data["checkForm"] - return cls( - cookies=cookies, - arl=arl, - token=token - ) + return cls(cookies=cookies, arl=arl, token=token) - async def call_legacy_api( - self, request_point: str, params: dict = None - ): + async def call_legacy_api(self, request_point: str, params: dict = None): async with aiohttp.ClientSession(cookies=self.cookies) as session: async with session.get( - f"https://api.deezer.com/{request_point}", - params=params, - headers=HTTP_HEADERS + f"https://api.deezer.com/{request_point}", + params=params, + headers=HTTP_HEADERS, ) as r: return await r.json() @@ -63,31 +54,26 @@ class DeezerEngine: async def get_data_iter(self, url: str): async with aiohttp.ClientSession( - cookies=self.cookies, - headers=HTTP_HEADERS + cookies=self.cookies, headers=HTTP_HEADERS ) as session: - r = await session.get( - url, - allow_redirects=True - ) + r = await session.get(url, allow_redirects=True) async for chunk in self._iter_exact_chunks(r): yield chunk async def call_api( - self, method: str, params: dict = None, - get_cookies: bool = False + self, method: str, params: dict = None, get_cookies: bool = False ): async with aiohttp.ClientSession(cookies=self.cookies) as session: async with session.post( - f"https://www.deezer.com/ajax/gw-light.php", - params={ - 'method': method, - 'api_version': '1.0', - 'input': '3', - 'api_token': self.token or 'null', - }, - headers=HTTP_HEADERS, - json=params + f"https://www.deezer.com/ajax/gw-light.php", + params={ + "method": method, + "api_version": "1.0", + "input": "3", + "api_token": self.token or "null", + }, + headers=HTTP_HEADERS, + json=params, ) as r: if not get_cookies: return await r.json() diff --git a/bot/modules/deezer/song.py b/bot/modules/deezer/song.py index 9954466..ac9c912 100644 --- a/bot/modules/deezer/song.py +++ b/bot/modules/deezer/song.py @@ -1,8 +1,7 @@ from attrs import define -from .driver import DeezerDriver - from ..common.song import BaseSongItem +from .driver import DeezerDriver @define @@ -10,11 +9,11 @@ class SongItem(BaseSongItem): @classmethod def from_deezer(cls, song_item: dict): return cls( - name=song_item['title'], - id=str(song_item['id']), - artists=[song_item['artist']['name']], - preview_url=song_item.get('preview'), - thumbnail=song_item['album']['cover_medium'] + name=song_item["title"], + id=str(song_item["id"]), + artists=[song_item["artist"]["name"]], + preview_url=song_item.get("preview"), + thumbnail=song_item["album"]["cover_medium"], ) @@ -25,21 +24,23 @@ class FullSongItem(BaseSongItem): @classmethod async def from_deezer(cls, song_item: dict): - if song_item.get('results'): - song_item = song_item['results'] + if song_item.get("results"): + song_item = song_item["results"] return cls( - name=song_item['SNG_TITLE'], - id=song_item['SNG_ID'], - artists=[artist['ART_NAME'] for artist in song_item['ARTISTS']], - preview_url=(song_item.get('MEDIA').get('HREF') - if type(song_item.get('MEDIA')) is dict and - song_item.get('MEDIA').get('TYPE') == 'preview' - else None), - thumbnail=f'https://e-cdns-images.dzcdn.net/images/cover/' - f'{song_item["ALB_PICTURE"]}/320x320.jpg', - duration=int(song_item['DURATION']), - track_dict=song_item + name=song_item["SNG_TITLE"], + id=song_item["SNG_ID"], + artists=[artist["ART_NAME"] for artist in song_item["ARTISTS"]], + preview_url=( + song_item.get("MEDIA").get("HREF") + if type(song_item.get("MEDIA")) is dict + and song_item.get("MEDIA").get("TYPE") == "preview" + else None + ), + thumbnail=f"https://e-cdns-images.dzcdn.net/images/cover/" + f'{song_item["ALB_PICTURE"]}/320x320.jpg', + duration=int(song_item["DURATION"]), + track_dict=song_item, ) diff --git a/bot/modules/deezer/track_formats.py b/bot/modules/deezer/track_formats.py index 700eaee..7c5e3e6 100644 --- a/bot/modules/deezer/track_formats.py +++ b/bot/modules/deezer/track_formats.py @@ -19,32 +19,11 @@ class TrackFormat: TRACK_FORMAT_MAP = { - FLAC: TrackFormat( - code=9, - ext=".flac" - ), - MP3_128: TrackFormat( - code=1, - ext=".mp3" - ), - MP3_256: TrackFormat( - code=5, - ext=".mp3" - ), - MP3_320: TrackFormat( - code=3, - ext=".mp3" - ), - MP4_RA1: TrackFormat( - code=13, - ext=".mp4" - ), - MP4_RA2: TrackFormat( - code=14, - ext=".mp4" - ), - MP4_RA3: TrackFormat( - code=15, - ext=".mp3" - ) + FLAC: TrackFormat(code=9, ext=".flac"), + MP3_128: TrackFormat(code=1, ext=".mp3"), + MP3_256: TrackFormat(code=5, ext=".mp3"), + MP3_320: TrackFormat(code=3, ext=".mp3"), + MP4_RA1: TrackFormat(code=13, ext=".mp4"), + MP4_RA2: TrackFormat(code=14, ext=".mp4"), + MP4_RA3: TrackFormat(code=15, ext=".mp3"), } diff --git a/bot/modules/deezer/util.py b/bot/modules/deezer/util.py index 14417dc..d534614 100644 --- a/bot/modules/deezer/util.py +++ b/bot/modules/deezer/util.py @@ -1,12 +1,11 @@ # https://pypi.org/project/music-helper/ -import warnings -import re import hashlib - -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +import re +import warnings from attrs import define +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from .track_formats import TrackFormat @@ -34,20 +33,20 @@ class UrlDecrypter: media_version: str def get_url_for(self, track_format: TrackFormat): - step1 = (f'{self.md5_origin}¤{track_format.code}¤' - f'{self.track_id}¤{self.media_version}') + step1 = ( + f"{self.md5_origin}¤{track_format.code}¤" + f"{self.track_id}¤{self.media_version}" + ) m = hashlib.md5() m.update(bytes([ord(x) for x in step1])) - step2 = f'{m.hexdigest()}¤{step1}¤' + step2 = f"{m.hexdigest()}¤{step1}¤" step2 = step2.ljust(80, " ") cipher = Cipher( - algorithm=algorithms.AES( - key=bytes('jo6aey6haid2Teih', 'ascii') - ), + algorithm=algorithms.AES(key=bytes("jo6aey6haid2Teih", "ascii")), mode=modes.ECB(), - backend=default_backend() + backend=default_backend(), ) encryptor = cipher.encryptor() @@ -55,7 +54,7 @@ class UrlDecrypter: cdn = self.md5_origin[0] - return f'https://e-cdns-proxy-{cdn}.dzcdn.net/mobile/1/{step3}' + return f"https://e-cdns-proxy-{cdn}.dzcdn.net/mobile/1/{step3}" @define @@ -69,12 +68,10 @@ class ChunkDecrypter: cipher = Cipher( algorithms.Blowfish(get_blowfish_key(track_id)), modes.CBC(bytes([i for i in range(8)])), - default_backend() + default_backend(), ) - return cls( - cipher=cipher - ) + return cls(cipher=cipher) def decrypt_chunk(self, chunk: bytes): decryptor = self.cipher.decryptor() @@ -82,7 +79,7 @@ class ChunkDecrypter: def get_blowfish_key(track_id: str): - secret = 'g4el58wc0zvf9na1' + secret = "g4el58wc0zvf9na1" m = hashlib.md5() m.update(bytes([ord(x) for x in track_id])) diff --git a/bot/modules/error/__init__.py b/bot/modules/error/__init__.py index f7c0612..f2620b5 100644 --- a/bot/modules/error/__init__.py +++ b/bot/modules/error/__init__.py @@ -1,2 +1,2 @@ -from .handler import on_error, Error +from .handler import Error, on_error from .pretty import PrettyException diff --git a/bot/modules/error/handler.py b/bot/modules/error/handler.py index 93a481d..ef51d5f 100644 --- a/bot/modules/error/handler.py +++ b/bot/modules/error/handler.py @@ -1,14 +1,14 @@ -from bot.common import console -from aiogram.types.error_event import ErrorEvent +from dataclasses import dataclass + from aiogram import Bot from aiogram.dispatcher import router as s_router - +from aiogram.types.error_event import ErrorEvent from rich.traceback import Traceback -from .pretty import PrettyException +from bot.common import console from bot.modules.database import db -from dataclasses import dataclass +from .pretty import PrettyException @dataclass @@ -19,8 +19,8 @@ class Error: async def on_error(event: ErrorEvent, bot: Bot): - import os import base64 + import os error_id = base64.urlsafe_b64encode(os.urandom(6)).decode() @@ -42,9 +42,9 @@ async def on_error(event: ErrorEvent, bot: Bot): await bot.edit_message_caption( inline_message_id=event.update.chosen_inline_result.inline_message_id, - caption=f'💔 ERROR occurred. Use this code to get more information: ' - f'{error_id}', - parse_mode='HTML', + caption=f"💔 ERROR occurred. Use this code to get more information: " + f"{error_id}", + parse_mode="HTML", ) else: @@ -53,7 +53,7 @@ async def on_error(event: ErrorEvent, bot: Bot): exception=pretty_exception, ) - console.print(f'[red]{error_id} occurred[/]') + console.print(f"[red]{error_id} occurred[/]") console.print(event) console.print(traceback) - console.print(f'-{error_id} occurred-') + console.print(f"-{error_id} occurred-") diff --git a/bot/modules/error/pretty.py b/bot/modules/error/pretty.py index a65337a..8d490aa 100644 --- a/bot/modules/error/pretty.py +++ b/bot/modules/error/pretty.py @@ -1,7 +1,7 @@ -import os -import traceback import contextlib +import os import re +import traceback from typing import Type @@ -13,12 +13,14 @@ class PrettyException: 🐊 {e.__traceback__.tb_frame.f_code.co_filename.replace(os.getcwd(), "")}\r :{e.__traceback__.tb_frame.f_lineno} """ - self.short = (f'{e.__class__.__name__}: ' - f'{"".join(traceback.format_exception_only(e)).strip()}') + self.short = ( + f"{e.__class__.__name__}: " + f'{"".join(traceback.format_exception_only(e)).strip()}' + ) - self.pretty_exception = (f"{self.long}\n\n" - f"⬇️ Trace:" - f"{self.get_full_stack()}") + self.pretty_exception = ( + f"{self.long}\n\n" f"⬇️ Trace:" f"{self.get_full_stack()}" + ) @staticmethod def get_full_stack(): @@ -40,9 +42,11 @@ class PrettyException: full_stack = "\n".join( [ - format_line(line) - if re.search(line_regex, line) - else f"{line}" + ( + format_line(line) + if re.search(line_regex, line) + else f"{line}" + ) for line in full_stack.splitlines() ] ) diff --git a/bot/modules/fsm/in_db.py b/bot/modules/fsm/in_db.py index ba07ce8..00c5269 100644 --- a/bot/modules/fsm/in_db.py +++ b/bot/modules/fsm/in_db.py @@ -1,13 +1,10 @@ -from bot.modules.database import db from dataclasses import dataclass, field from typing import Any, DefaultDict, Dict, Optional from aiogram.fsm.state import State -from aiogram.fsm.storage.base import ( - BaseStorage, - StateType, - StorageKey, -) +from aiogram.fsm.storage.base import BaseStorage, StateType, StorageKey + +from bot.modules.database import db @dataclass @@ -18,14 +15,14 @@ class MemoryStorageRecord: class StorageDict(DefaultDict): def __init__(self, default_factory=None) -> None: - if type(db.fsm.get('fsm')) is not dict: - db.fsm['fsm'] = dict() + if type(db.fsm.get("fsm")) is not dict: + db.fsm["fsm"] = dict() - super().__init__(default_factory, db.fsm['fsm']) + super().__init__(default_factory, db.fsm["fsm"]) def __setitem__(self, key, value): super().__setitem__(key, value) - db.fsm['fsm'] = dict(self) + db.fsm["fsm"] = dict(self) class InDbStorage(BaseStorage): diff --git a/bot/modules/settings/__init__.py b/bot/modules/settings/__init__.py index 7201f5a..e2cdcae 100644 --- a/bot/modules/settings/__init__.py +++ b/bot/modules/settings/__init__.py @@ -1 +1 @@ -from .model import UserSettings, Setting, settings_strings +from .model import Setting, UserSettings, settings_strings diff --git a/bot/modules/settings/model.py b/bot/modules/settings/model.py index b601ac6..54193f0 100644 --- a/bot/modules/settings/model.py +++ b/bot/modules/settings/model.py @@ -1,4 +1,5 @@ from dataclasses import dataclass + from ..database import db @@ -11,46 +12,32 @@ class Setting: settings_strings: dict[str, Setting] = { - 'search_preview': Setting( - name='Search preview', - description='Show only covers (better display), ' - 'or add 30 seconds of track preview whenever possible?', - choices={ - 'cover': 'Cover picture', - 'preview': 'Audio preview' - }, + "search_preview": Setting( + name="Search preview", + description="Show only covers (better display), " + "or add 30 seconds of track preview whenever possible?", + choices={"cover": "Cover picture", "preview": "Audio preview"}, ), - 'recode_youtube': Setting( - name='Recode YouTube (and Spotify)', - description='Recode when downloading from YouTube (and Spotify) to ' - 'more compatible format (may take some time)', - choices={ - 'no': 'Send original file', - 'yes': 'Recode to libmp3lame' - }, + "recode_youtube": Setting( + name="Recode YouTube (and Spotify)", + description="Recode when downloading from YouTube (and Spotify) to " + "more compatible format (may take some time)", + choices={"no": "Send original file", "yes": "Recode to libmp3lame"}, ), - 'exact_spotify_search': Setting( - name='Only exact Spotify matches', - description='When searching on Youtube from Spotify, show only exact matches, ' - 'may protect against inaccurate matches, but at the same time it ' - 'can lose reuploaded tracks. Should be enabled always, except in ' - 'situations where the track is not found on both YouTube and ' - 'Deezer', - choices={ - 'yes': 'Only exact matches', - 'no': 'Fuzzy matches also' - }, + "exact_spotify_search": Setting( + name="Only exact Spotify matches", + description="When searching on Youtube from Spotify, show only exact matches, " + "may protect against inaccurate matches, but at the same time it " + "can lose reuploaded tracks. Should be enabled always, except in " + "situations where the track is not found on both YouTube and " + "Deezer", + choices={"yes": "Only exact matches", "no": "Fuzzy matches also"}, + ), + "default_search_provider": Setting( + name="Default search provider", + description="Which service to use when searching without service filter", + choices={"d": "Deezer", "c": "SoundCloud", "y": "YouTube", "s": "Spotify"}, ), - 'default_search_provider': Setting( - name='Default search provider', - description='Which service to use when searching without service filter', - choices={ - 'd': 'Deezer', - 'c': 'SoundCloud', - 'y': 'YouTube', - 's': 'Spotify' - } - ) } @@ -64,8 +51,8 @@ class UserSettings: if db.settings.get(self.user_id) is None: db.settings[self.user_id] = dict( - (setting, list(settings_strings[setting].choices)[0]) for setting in - settings_strings + (setting, list(settings_strings[setting].choices)[0]) + for setting in settings_strings ) def __getitem__(self, item): diff --git a/bot/modules/soundcloud/__init__.py b/bot/modules/soundcloud/__init__.py index da4c00d..1e6c4b7 100644 --- a/bot/modules/soundcloud/__init__.py +++ b/bot/modules/soundcloud/__init__.py @@ -1,10 +1,10 @@ -from .soundcloud import SoundCloud -from .downloader import SoundCloudBytestream from bot.utils.config import config +from .downloader import SoundCloudBytestream +from .soundcloud import SoundCloud soundcloud = SoundCloud( client_id=config.tokens.soundcloud.client_id, ) -__all__ = ['soundcloud', 'SoundCloudBytestream'] +__all__ = ["soundcloud", "SoundCloudBytestream"] diff --git a/bot/modules/soundcloud/downloader.py b/bot/modules/soundcloud/downloader.py index 15d7a29..40c5a7e 100644 --- a/bot/modules/soundcloud/downloader.py +++ b/bot/modules/soundcloud/downloader.py @@ -1,11 +1,11 @@ -from attrs import define from typing import Callable +import m3u8 +from attrs import define + from .driver import SoundCloudDriver from .song import SongItem -import m3u8 - @define class SoundCloudBytestream: @@ -15,18 +15,9 @@ class SoundCloudBytestream: song: SongItem @classmethod - def from_bytes( - cls, - bytes_: bytes, - filename: str, - duration: int, - song: SongItem - ): + def from_bytes(cls, bytes_: bytes, filename: str, duration: int, song: SongItem): return cls( - file=bytes_, - filename=filename, - duration=int(duration / 1000), - song=song + file=bytes_, filename=filename, duration=int(duration / 1000), song=song ) @@ -40,60 +31,53 @@ class Downloader: song: SongItem @classmethod - async def build( - cls, - song_id: str, - driver: SoundCloudDriver - ): + async def build(cls, song_id: str, driver: SoundCloudDriver): track = await driver.get_track(song_id) song = SongItem.from_soundcloud(track) - if url := cls._try_get_progressive(track['media']['transcodings']): + if url := cls._try_get_progressive(track["media"]["transcodings"]): method = cls._progressive else: - url = track['media']['transcodings'][0]['url'] - method = cls._hls if \ - (track['media']['transcodings'][0]['format']['protocol'] - == 'hls') else cls._progressive + url = track["media"]["transcodings"][0]["url"] + method = ( + cls._hls + if (track["media"]["transcodings"][0]["format"]["protocol"] == "hls") + else cls._progressive + ) return cls( driver=driver, - duration=track['duration'], + duration=track["duration"], method=method, download_url=url, filename=f'{track["title"]}.mp3', - song=song + song=song, ) @staticmethod def _try_get_progressive(urls: list) -> str | None: for transcode in urls: - if transcode['format']['protocol'] == 'progressive': - return transcode['url'] + if transcode["format"]["protocol"] == "progressive": + return transcode["url"] async def _progressive(self, url: str) -> bytes: return await self.driver.engine.read_data( - url=(await self.driver.engine.get( - url - ))['url'] + url=(await self.driver.engine.get(url))["url"] ) async def _hls(self, url: str) -> bytes: m3u8_obj = m3u8.loads( - (await self.driver.engine.read_data( - (await self.driver.engine.get( - url=url - ))['url'] - )).decode() + ( + await self.driver.engine.read_data( + (await self.driver.engine.get(url=url))["url"] + ) + ).decode() ) content = bytearray() for segment in m3u8_obj.files: content.extend( - await self.driver.engine.read_data( - url=segment, - append_client_id=False - ) + await self.driver.engine.read_data(url=segment, append_client_id=False) ) return content @@ -103,7 +87,7 @@ class Downloader: bytes_=await self.method(self, self.download_url), filename=self.filename, duration=self.duration, - song=self.song + song=self.song, ) @@ -112,7 +96,4 @@ class DownloaderBuilder: driver: SoundCloudDriver async def from_id(self, song_id: str): - return await Downloader.build( - song_id=song_id, - driver=self.driver - ) + return await Downloader.build(song_id=song_id, driver=self.driver) diff --git a/bot/modules/soundcloud/driver.py b/bot/modules/soundcloud/driver.py index de63309..d9b8d59 100644 --- a/bot/modules/soundcloud/driver.py +++ b/bot/modules/soundcloud/driver.py @@ -8,23 +8,12 @@ class SoundCloudDriver: engine: SoundCloudEngine async def get_track(self, track_id: int | str): - return await self.engine.call( - f'tracks/{track_id}' - ) + return await self.engine.call(f"tracks/{track_id}") async def search(self, query: str, limit: int = 30): - return (await self.engine.call( - 'search/tracks', - params={ - 'q': query, - 'limit': limit - } - ))['collection'] + return ( + await self.engine.call("search/tracks", params={"q": query, "limit": limit}) + )["collection"] async def resolve_url(self, url: str): - return await self.engine.call( - 'resolve', - params={ - 'url': url - } - ) + return await self.engine.call("resolve", params={"url": url}) diff --git a/bot/modules/soundcloud/engine.py b/bot/modules/soundcloud/engine.py index 9ce4ba1..311736e 100644 --- a/bot/modules/soundcloud/engine.py +++ b/bot/modules/soundcloud/engine.py @@ -1,5 +1,5 @@ -from attrs import define import aiohttp +from attrs import define @define @@ -8,27 +8,33 @@ class SoundCloudEngine: async def call(self, request_point: str, params: dict = None): return await self.get( - url=f'https://api-v2.soundcloud.com/{request_point}', - params=params + url=f"https://api-v2.soundcloud.com/{request_point}", params=params ) async def get(self, url: str, params: dict = None): async with aiohttp.ClientSession() as session: async with session.get( - url, - params=(params or {}) | { - 'client_id': self.client_id, - }, + url, + params=(params or {}) + | { + "client_id": self.client_id, + }, ) as r: return await r.json() - async def read_data(self, url: str, params: dict = None, - append_client_id: bool = True): + async def read_data( + self, url: str, params: dict = None, append_client_id: bool = True + ): async with aiohttp.ClientSession() as session: async with session.get( - url, - params=(params or {}) | ({ - 'client_id': self.client_id, - } if append_client_id else {}), + url, + params=(params or {}) + | ( + { + "client_id": self.client_id, + } + if append_client_id + else {} + ), ) as r: return await r.content.read() diff --git a/bot/modules/soundcloud/song.py b/bot/modules/soundcloud/song.py index 5ca7c61..0ced03a 100644 --- a/bot/modules/soundcloud/song.py +++ b/bot/modules/soundcloud/song.py @@ -9,13 +9,15 @@ class SongItem(BaseSongItem): @classmethod def from_soundcloud(cls, song_item: dict): return cls( - name=song_item['title'], - id=str(song_item['id']), + name=song_item["title"], + id=str(song_item["id"]), artists=[], - thumbnail=(song_item['artwork_url'] or song_item['user']['avatar_url'] or - 'https://soundcloud.com/images/default_avatar_large.png') - .replace('large.jpg', 't300x300.jpg'), - preview_url=None + thumbnail=( + song_item["artwork_url"] + or song_item["user"]["avatar_url"] + or "https://soundcloud.com/images/default_avatar_large.png" + ).replace("large.jpg", "t300x300.jpg"), + preview_url=None, ) @property diff --git a/bot/modules/soundcloud/soundcloud.py b/bot/modules/soundcloud/soundcloud.py index 03cb8e9..86d3599 100644 --- a/bot/modules/soundcloud/soundcloud.py +++ b/bot/modules/soundcloud/soundcloud.py @@ -1,7 +1,7 @@ -from .engine import SoundCloudEngine -from .driver import SoundCloudDriver -from .song import Songs from .downloader import DownloaderBuilder +from .driver import SoundCloudDriver +from .engine import SoundCloudEngine +from .song import Songs class SoundCloud(object): diff --git a/bot/modules/spotify/__init__.py b/bot/modules/spotify/__init__.py index 2b403d4..44103dd 100644 --- a/bot/modules/spotify/__init__.py +++ b/bot/modules/spotify/__init__.py @@ -1,10 +1,10 @@ -from .spotify import Spotify from bot.utils.config import config +from .spotify import Spotify spotify = Spotify( client_id=config.tokens.spotify.client_id, - client_secret=config.tokens.spotify.client_secret + client_secret=config.tokens.spotify.client_secret, ) -__all__ = ['spotify'] +__all__ = ["spotify"] diff --git a/bot/modules/spotify/song.py b/bot/modules/spotify/song.py index fcd453b..4f56974 100644 --- a/bot/modules/spotify/song.py +++ b/bot/modules/spotify/song.py @@ -1,5 +1,5 @@ -from attrs import define import spotipy +from attrs import define from ..common.song import BaseSongItem @@ -9,12 +9,15 @@ class SongItem(BaseSongItem): @classmethod def from_spotify(cls, song_item: dict): return cls( - name=song_item['name'], - id=song_item['id'], - artists=[artist['name'] for artist in song_item['artists']], - preview_url=song_item['preview_url'].split('?')[0] if - song_item['preview_url'] is not None else None, - thumbnail=song_item['album']['images'][1]['url'] + name=song_item["name"], + id=song_item["id"], + artists=[artist["name"] for artist in song_item["artists"]], + preview_url=( + song_item["preview_url"].split("?")[0] + if song_item["preview_url"] is not None + else None + ), + thumbnail=song_item["album"]["images"][1]["url"], ) @@ -28,7 +31,7 @@ class Songs(object): if r is None: return None - return [SongItem.from_spotify(item) for item in r['tracks']['items']] + return [SongItem.from_spotify(item) for item in r["tracks"]["items"]] def from_id(self, song_id: str) -> SongItem | None: r = self.spotify.track(song_id) diff --git a/bot/modules/spotify/spotify.py b/bot/modules/spotify/spotify.py index bde36cf..3b2aa11 100644 --- a/bot/modules/spotify/spotify.py +++ b/bot/modules/spotify/spotify.py @@ -8,11 +8,10 @@ class Spotify(object): def __init__(self, client_id, client_secret): self.spotify = spotipy.Spotify( client_credentials_manager=SpotifyClientCredentials( - client_id=client_id, - client_secret=client_secret + client_id=client_id, client_secret=client_secret ), backoff_factor=0.1, - retries=10 + retries=10, ) self.songs = Songs(self.spotify) diff --git a/bot/modules/url/__init__.py b/bot/modules/url/__init__.py index 38ad1dc..b1ec77b 100644 --- a/bot/modules/url/__init__.py +++ b/bot/modules/url/__init__.py @@ -1,2 +1,2 @@ -from .recognise import recognise_music_service, RecognisedService from .id_getter import get_id +from .recognise import RecognisedService, recognise_music_service diff --git a/bot/modules/url/id_getter.py b/bot/modules/url/id_getter.py index 21b0fc3..e6b8028 100644 --- a/bot/modules/url/id_getter.py +++ b/bot/modules/url/id_getter.py @@ -1,7 +1,7 @@ -from .recognise import RecognisedService - import aiohttp +from .recognise import RecognisedService + async def get_url_after_redirect(url: str) -> str: async with aiohttp.ClientSession() as session: @@ -10,26 +10,28 @@ async def get_url_after_redirect(url: str) -> str: async def get_id(recognised: RecognisedService): - if recognised.name == 'yt': - return recognised.parse_result.path.replace('/', '') if ( - recognised.parse_result.netloc.endswith('youtu.be') - ) else recognised.parse_result.query.split('=')[1].split('&')[0] + if recognised.name == "yt": + return ( + recognised.parse_result.path.replace("/", "") + if (recognised.parse_result.netloc.endswith("youtu.be")) + else recognised.parse_result.query.split("=")[1].split("&")[0] + ) - elif recognised.name == 'spot': - if recognised.parse_result.netloc.endswith('open.spotify.com'): - return recognised.parse_result.path.split('/')[2] + elif recognised.name == "spot": + if recognised.parse_result.netloc.endswith("open.spotify.com"): + return recognised.parse_result.path.split("/")[2] else: url = await get_url_after_redirect(recognised.parse_result.geturl()) - return url.split('/')[-1].split('?')[0] + return url.split("/")[-1].split("?")[0] - elif recognised.name == 'deez': - if recognised.parse_result.netloc.endswith('deezer.com'): - return recognised.parse_result.path.split('/')[-1] + elif recognised.name == "deez": + if recognised.parse_result.netloc.endswith("deezer.com"): + return recognised.parse_result.path.split("/")[-1] else: url = await get_url_after_redirect(recognised.parse_result.geturl()) - return url.split('/')[-1].split('?')[0] + return url.split("/")[-1].split("?")[0] - elif recognised.name == 'sc': - if not recognised.parse_result.netloc.startswith('on'): + elif recognised.name == "sc": + if not recognised.parse_result.netloc.startswith("on"): return recognised.parse_result.geturl() return await get_url_after_redirect(recognised.parse_result.geturl()) diff --git a/bot/modules/url/recognise.py b/bot/modules/url/recognise.py index 4be60d7..f42c023 100644 --- a/bot/modules/url/recognise.py +++ b/bot/modules/url/recognise.py @@ -1,20 +1,18 @@ -from urllib.parse import urlparse, ParseResult from dataclasses import dataclass - -from typing import Callable, Awaitable, Literal +from typing import Awaitable, Callable, Literal +from urllib.parse import ParseResult, urlparse from bot.modules.database import db from bot.modules.database.db import DBDict - -from bot.modules.youtube import youtube -from bot.modules.spotify import spotify from bot.modules.deezer import deezer from bot.modules.soundcloud import soundcloud +from bot.modules.spotify import spotify +from bot.modules.youtube import youtube @dataclass class RecognisedService: - name: Literal['yt', 'spot', 'deez', 'sc'] + name: Literal["yt", "spot", "deez", "sc"] db_table: DBDict by_id_func: Callable | Awaitable parse_result: ParseResult @@ -22,33 +20,33 @@ class RecognisedService: def recognise_music_service(url: str) -> RecognisedService | None: url = urlparse(url) - if url.netloc.endswith('youtube.com') or url.netloc.endswith('youtu.be'): + if url.netloc.endswith("youtube.com") or url.netloc.endswith("youtu.be"): return RecognisedService( - name='yt', + name="yt", db_table=db.youtube, by_id_func=youtube.songs.from_id, - parse_result=url + parse_result=url, ) - elif url.netloc.endswith('open.spotify.com') or url.netloc.endswith('spotify.link'): + elif url.netloc.endswith("open.spotify.com") or url.netloc.endswith("spotify.link"): return RecognisedService( - name='spot', + name="spot", db_table=db.spotify, by_id_func=spotify.songs.from_id, - parse_result=url + parse_result=url, ) - elif url.netloc.endswith('deezer.page.link') or url.netloc.endswith('deezer.com'): + elif url.netloc.endswith("deezer.page.link") or url.netloc.endswith("deezer.com"): return RecognisedService( - name='deez', + name="deez", db_table=db.deezer, by_id_func=deezer.songs.from_id, - parse_result=url + parse_result=url, ) - elif url.netloc.endswith('soundcloud.com'): + elif url.netloc.endswith("soundcloud.com"): return RecognisedService( - name='sc', + name="sc", db_table=db.soundcloud, by_id_func=soundcloud.songs.from_url, - parse_result=url + parse_result=url, ) else: return None diff --git a/bot/modules/youtube/__init__.py b/bot/modules/youtube/__init__.py index 47e992e..50a6e00 100644 --- a/bot/modules/youtube/__init__.py +++ b/bot/modules/youtube/__init__.py @@ -1,8 +1,8 @@ -from .youtube import YouTube -from pytube.exceptions import AgeRestrictedError +from pytubefix.exceptions import AgeRestrictedError +from .youtube import YouTube youtube = YouTube() -__all__ = ['youtube', 'AgeRestrictedError'] +__all__ = ["youtube", "AgeRestrictedError"] diff --git a/bot/modules/youtube/downloader.py b/bot/modules/youtube/downloader.py index bfd3ce6..4ce6875 100644 --- a/bot/modules/youtube/downloader.py +++ b/bot/modules/youtube/downloader.py @@ -1,12 +1,10 @@ -from attrs import define -from pytube import YouTube, Stream - -from pydub import AudioSegment +import asyncio +from concurrent.futures import ThreadPoolExecutor from io import BytesIO -from concurrent.futures import ThreadPoolExecutor - -import asyncio +from attrs import define +from pydub import AudioSegment +from pytubefix import Stream, YouTube @define @@ -16,12 +14,7 @@ class YouTubeBytestream: duration: int @classmethod - def from_bytestream( - cls, - bytestream: BytesIO, - filename: str, - duration: float - ): + def from_bytestream(cls, bytestream: BytesIO, filename: str, duration: float): bytestream.seek(0) return cls( file=bytestream.read(), @@ -30,11 +23,9 @@ class YouTubeBytestream: ) def __rerender(self): - segment = AudioSegment.from_file( - file=BytesIO(self.file) - ) + segment = AudioSegment.from_file(file=BytesIO(self.file)) - self.file = segment.export(BytesIO(), format='mp3', codec='libmp3lame').read() + self.file = segment.export(BytesIO(), format="mp3", codec="libmp3lame").read() return self async def rerender(self): @@ -54,13 +45,18 @@ class Downloader: def from_id(cls, yt_id: str): video = YouTube.from_id(yt_id) - audio_stream = video.streams.filter( - only_audio=True, - ).order_by('abr').desc().first() + audio_stream = ( + video.streams.filter( + only_audio=True, + ) + .order_by("abr") + .desc() + .first() + ) return cls( audio_stream=audio_stream, - filename=f'{audio_stream.default_filename}.mp3', + filename=f"{audio_stream.default_filename}.mp3", duration=int(video.length), ) diff --git a/bot/modules/youtube/song.py b/bot/modules/youtube/song.py index 7eca20c..35ab56a 100644 --- a/bot/modules/youtube/song.py +++ b/bot/modules/youtube/song.py @@ -1,11 +1,10 @@ -from attrs import define -import ytmusicapi - -from .downloader import Downloader, YouTubeBytestream - from typing import Awaitable +import ytmusicapi +from attrs import define + from ..common.song import BaseSongItem +from .downloader import Downloader, YouTubeBytestream @define @@ -15,19 +14,19 @@ class SongItem(BaseSongItem): @classmethod def from_youtube(cls, song_item: dict): return cls( - name=song_item['title'], - id=song_item['videoId'], - artists=[artist['name'] for artist in song_item['artists']], - thumbnail=song_item['thumbnails'][1]['url'] + name=song_item["title"], + id=song_item["videoId"], + artists=[artist["name"] for artist in song_item["artists"]], + thumbnail=song_item["thumbnails"][1]["url"], ) @classmethod def from_details(cls, details: dict): return cls( - name=details['title'], - id=details['videoId'], - artists=details['author'].split(' & '), - thumbnail=details['thumbnail']['thumbnails'][1]['url'] + name=details["title"], + id=details["videoId"], + artists=details["author"].split(" & "), + thumbnail=details["thumbnail"]["thumbnails"][1]["url"], ) def to_bytestream(self) -> Awaitable[YouTubeBytestream]: @@ -39,16 +38,10 @@ class Songs(object): ytm: ytmusicapi.YTMusic def search( - self, - query: str, - limit: int = 10, - exact_match: bool = False + self, query: str, limit: int = 10, exact_match: bool = False ) -> list[SongItem] | None: r = self.ytm.search( - query, - limit=limit, - filter='songs', - ignore_spelling=exact_match + query, limit=limit, filter="songs", ignore_spelling=exact_match ) if r is None: @@ -68,4 +61,4 @@ class Songs(object): if r is None: return None - return SongItem.from_details(r['videoDetails']) + return SongItem.from_details(r["videoDetails"]) diff --git a/bot/modules/youtube/youtube.py b/bot/modules/youtube/youtube.py index 80ddbfd..0c1a0af 100644 --- a/bot/modules/youtube/youtube.py +++ b/bot/modules/youtube/youtube.py @@ -1,7 +1,7 @@ import ytmusicapi -from .song import Songs from .downloader import Downloader +from .song import Songs class YouTube(object): @@ -9,6 +9,4 @@ class YouTube(object): self.ytm = ytmusicapi.YTMusic() self.download = Downloader - self.songs = Songs( - self.ytm - ) + self.songs = Songs(self.ytm) diff --git a/bot/results/common/search.py b/bot/results/common/search.py index ec305ad..379342a 100644 --- a/bot/results/common/search.py +++ b/bot/results/common/search.py @@ -1,46 +1,48 @@ +from typing import TypeVar + from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio, - InlineKeyboardMarkup, InlineKeyboardButton + InlineKeyboardButton, + InlineKeyboardMarkup, + InlineQueryResultCachedAudio, + InlineQueryResultDocument, ) +from bot.modules.common.song import BaseSongItem from bot.modules.database.db import DBDict from bot.modules.settings import UserSettings -from bot.modules.common.song import BaseSongItem -from typing import TypeVar - - -BaseSongT = TypeVar('BaseSongT', bound=BaseSongItem) +BaseSongT = TypeVar("BaseSongT", bound=BaseSongItem) async def get_common_search_result( - audio: BaseSongT, - db_table: DBDict, - service_id: str, - settings: UserSettings + audio: BaseSongT, db_table: DBDict, service_id: str, settings: UserSettings ) -> InlineQueryResultDocument | InlineQueryResultCachedAudio: return ( InlineQueryResultDocument( - id=f'{service_id}::' + audio.id, + id=f"{service_id}::" + audio.id, title=audio.name, description=audio.all_artists, thumb_url=audio.thumbnail, - document_url=(audio.preview_url or audio.thumbnail) if - settings['search_preview'].value == 'preview' else audio.thumbnail, - mime_type='application/zip', + document_url=( + (audio.preview_url or audio.thumbnail) + if settings["search_preview"].value == "preview" + else audio.thumbnail + ), + mime_type="application/zip", reply_markup=InlineKeyboardMarkup( inline_keyboard=[ - [InlineKeyboardButton(text='Downloading...', callback_data='.')] + [InlineKeyboardButton(text="Downloading...", callback_data=".")] ] ), caption=audio.full_name, - ) if audio.id not in list(db_table.keys()) else - InlineQueryResultCachedAudio( - id=f'{service_id}c::' + audio.id, + ) + if audio.id not in list(db_table.keys()) + else InlineQueryResultCachedAudio( + id=f"{service_id}c::" + audio.id, audio_file_id=db_table[audio.id], reply_markup=InlineKeyboardMarkup( inline_keyboard=[ - [InlineKeyboardButton(text='Verifying...', callback_data='.')] + [InlineKeyboardButton(text="Verifying...", callback_data=".")] ] ), ) diff --git a/bot/results/deezer/__init__.py b/bot/results/deezer/__init__.py index af09ab9..22c11a5 100644 --- a/bot/results/deezer/__init__.py +++ b/bot/results/deezer/__init__.py @@ -1,4 +1,3 @@ from .search import get_deezer_search_results - -__all__ = ['get_deezer_search_results'] +__all__ = ["get_deezer_search_results"] diff --git a/bot/results/deezer/search.py b/bot/results/deezer/search.py index 3019f36..6412e41 100644 --- a/bot/results/deezer/search.py +++ b/bot/results/deezer/search.py @@ -1,23 +1,18 @@ -from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio -) +from aiogram.types import InlineQueryResultCachedAudio, InlineQueryResultDocument -from bot.modules.deezer import deezer from bot.modules.database import db +from bot.modules.deezer import deezer from bot.modules.settings import UserSettings from ..common.search import get_common_search_result -async def get_deezer_search_results(query: str, settings: UserSettings) -> list[ - InlineQueryResultDocument | InlineQueryResultCachedAudio -]: +async def get_deezer_search_results( + query: str, settings: UserSettings +) -> list[InlineQueryResultDocument | InlineQueryResultCachedAudio]: return [ await get_common_search_result( - audio=audio, - db_table=db.deezer, - service_id='deez', - settings=settings + audio=audio, db_table=db.deezer, service_id="deez", settings=settings ) for audio in await deezer.songs.search(query, limit=50) ] diff --git a/bot/results/error/error.py b/bot/results/error/error.py index 4799792..df05039 100644 --- a/bot/results/error/error.py +++ b/bot/results/error/error.py @@ -1,31 +1,31 @@ -from aiogram.types import ( - InlineQueryResultArticle, InputTextMessageContent, -) +from aiogram.types import InlineQueryResultArticle, InputTextMessageContent +from bot.common import console from bot.modules.database import db from bot.modules.error import Error -from bot.common import console - -async def get_error_search_results(error_id: str) -> (list[InlineQueryResultArticle] - | None): +async def get_error_search_results( + error_id: str, +) -> list[InlineQueryResultArticle] | None: error: Error = db.errors.get(error_id) if error is None: return [] - console.print(f'{error_id} requested') + console.print(f"{error_id} requested") console.print(error.traceback) - console.print(f'-{error_id} requested-') + console.print(f"-{error_id} requested-") - return [( - InlineQueryResultArticle( - id=error_id, - title=f'Error {error_id}', - description=error.exception.short, - input_message_content=InputTextMessageContent( - message_text=error.exception.long, - parse_mode='HTML', - ), + return [ + ( + InlineQueryResultArticle( + id=error_id, + title=f"Error {error_id}", + description=error.exception.short, + input_message_content=InputTextMessageContent( + message_text=error.exception.long, + parse_mode="HTML", + ), + ) ) - )] + ] diff --git a/bot/results/soundcloud/__init__.py b/bot/results/soundcloud/__init__.py index ea67cc2..095f38b 100644 --- a/bot/results/soundcloud/__init__.py +++ b/bot/results/soundcloud/__init__.py @@ -1,6 +1,3 @@ from .search import get_soundcloud_search_results - -__all__ = [ - 'get_soundcloud_search_results' -] +__all__ = ["get_soundcloud_search_results"] diff --git a/bot/results/soundcloud/search.py b/bot/results/soundcloud/search.py index 8c6b662..8c3840d 100644 --- a/bot/results/soundcloud/search.py +++ b/bot/results/soundcloud/search.py @@ -1,23 +1,18 @@ -from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio -) +from aiogram.types import InlineQueryResultCachedAudio, InlineQueryResultDocument -from bot.modules.soundcloud import soundcloud from bot.modules.database import db from bot.modules.settings import UserSettings +from bot.modules.soundcloud import soundcloud from ..common.search import get_common_search_result -async def get_soundcloud_search_results(query: str, settings: UserSettings) -> list[ - InlineQueryResultDocument | InlineQueryResultCachedAudio -]: +async def get_soundcloud_search_results( + query: str, settings: UserSettings +) -> list[InlineQueryResultDocument | InlineQueryResultCachedAudio]: return [ await get_common_search_result( - audio=audio, - db_table=db.soundcloud, - service_id='sc', - settings=settings + audio=audio, db_table=db.soundcloud, service_id="sc", settings=settings ) for audio in await soundcloud.songs.search(query, limit=50) ] diff --git a/bot/results/spotify/__init__.py b/bot/results/spotify/__init__.py index eacb443..d6513be 100644 --- a/bot/results/spotify/__init__.py +++ b/bot/results/spotify/__init__.py @@ -1,6 +1,3 @@ from .search import get_spotify_search_results - -__all__ = [ - 'get_spotify_search_results' -] +__all__ = ["get_spotify_search_results"] diff --git a/bot/results/spotify/search.py b/bot/results/spotify/search.py index 9598bbd..e46dae6 100644 --- a/bot/results/spotify/search.py +++ b/bot/results/spotify/search.py @@ -1,23 +1,18 @@ -from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio -) +from aiogram.types import InlineQueryResultCachedAudio, InlineQueryResultDocument -from bot.modules.spotify import spotify from bot.modules.database import db from bot.modules.settings import UserSettings +from bot.modules.spotify import spotify from ..common.search import get_common_search_result -async def get_spotify_search_results(query: str, settings: UserSettings) -> list[ - InlineQueryResultDocument | InlineQueryResultCachedAudio -]: +async def get_spotify_search_results( + query: str, settings: UserSettings +) -> list[InlineQueryResultDocument | InlineQueryResultCachedAudio]: return [ await get_common_search_result( - audio=audio, - db_table=db.spotify, - service_id='spot', - settings=settings + audio=audio, db_table=db.spotify, service_id="spot", settings=settings ) for audio in spotify.songs.search(query, limit=50) ] diff --git a/bot/results/url/url.py b/bot/results/url/url.py index a4325dd..385b9a1 100644 --- a/bot/results/url/url.py +++ b/bot/results/url/url.py @@ -1,18 +1,16 @@ -from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio -) +import inspect + +from aiogram.types import InlineQueryResultCachedAudio, InlineQueryResultDocument -from bot.modules.url import recognise_music_service, get_id from bot.modules.settings import UserSettings +from bot.modules.url import get_id, recognise_music_service from ..common.search import get_common_search_result -import inspect - -async def get_url_results(query: str, settings: UserSettings) -> list[ - InlineQueryResultDocument | InlineQueryResultCachedAudio -]: +async def get_url_results( + query: str, settings: UserSettings +) -> list[InlineQueryResultDocument | InlineQueryResultCachedAudio]: service = recognise_music_service(query) if inspect.iscoroutinefunction(service.by_id_func): audio = await service.by_id_func(await get_id(service)) @@ -26,6 +24,6 @@ async def get_url_results(query: str, settings: UserSettings) -> list[ audio=audio, db_table=service.db_table, service_id=service.name, - settings=settings + settings=settings, ) ] diff --git a/bot/results/youtube/__init__.py b/bot/results/youtube/__init__.py index b95c9c6..f95951e 100644 --- a/bot/results/youtube/__init__.py +++ b/bot/results/youtube/__init__.py @@ -1,6 +1,3 @@ from .search import get_youtube_search_results - -__all__ = [ - 'get_youtube_search_results' -] +__all__ = ["get_youtube_search_results"] diff --git a/bot/results/youtube/search.py b/bot/results/youtube/search.py index d82e69b..4ea126f 100644 --- a/bot/results/youtube/search.py +++ b/bot/results/youtube/search.py @@ -1,23 +1,18 @@ -from aiogram.types import ( - InlineQueryResultDocument, InlineQueryResultCachedAudio -) +from aiogram.types import InlineQueryResultCachedAudio, InlineQueryResultDocument -from bot.modules.youtube import youtube from bot.modules.database import db from bot.modules.settings import UserSettings +from bot.modules.youtube import youtube from ..common.search import get_common_search_result -async def get_youtube_search_results(query: str, settings: UserSettings) -> list[ - InlineQueryResultDocument | InlineQueryResultCachedAudio -]: +async def get_youtube_search_results( + query: str, settings: UserSettings +) -> list[InlineQueryResultDocument | InlineQueryResultCachedAudio]: return [ await get_common_search_result( - audio=audio, - db_table=db.youtube, - service_id='yt', - settings=settings + audio=audio, db_table=db.youtube, service_id="yt", settings=settings ) for audio in youtube.songs.search(query, limit=40) ] diff --git a/bot/utils/config/__init__.py b/bot/utils/config/__init__.py index c7cb2eb..4efceb0 100644 --- a/bot/utils/config/__init__.py +++ b/bot/utils/config/__init__.py @@ -1,4 +1,3 @@ from ._config import Config - config = Config() diff --git a/bot/utils/config/_config.py b/bot/utils/config/_config.py index fd6bbe2..47f3f92 100644 --- a/bot/utils/config/_config.py +++ b/bot/utils/config/_config.py @@ -5,7 +5,7 @@ class Config(dict): def __init__(self, _config: dict = None): try: if _config is None: - config = tomllib.load(open('config.toml', 'rb')) + config = tomllib.load(open("config.toml", "rb")) super().__init__(**config) else: diff --git a/lib/ShazamIO/shazamio/__init__.py b/lib/ShazamIO/shazamio/__init__.py index 1d6b6aa..e3986f7 100644 --- a/lib/ShazamIO/shazamio/__init__.py +++ b/lib/ShazamIO/shazamio/__init__.py @@ -1,6 +1,6 @@ -from .serializers import Serialize from .api import Shazam from .converter import Geo from .enums import GenreMusic +from .serializers import Serialize __all__ = ("Serialize", "Shazam", "Geo", "GenreMusic") diff --git a/lib/ShazamIO/shazamio/algorithm.py b/lib/ShazamIO/shazamio/algorithm.py index e7ac8a0..54ef897 100644 --- a/lib/ShazamIO/shazamio/algorithm.py +++ b/lib/ShazamIO/shazamio/algorithm.py @@ -1,5 +1,6 @@ from copy import copy -from typing import List, Optional, Any +from typing import Any, List, Optional + import numpy as np from .enums import FrequencyBand @@ -40,7 +41,9 @@ class SignatureGenerator: # Used when processing input: - self.ring_buffer_of_samples: RingBuffer[int] = RingBuffer(buffer_size=2048, default_value=0) + self.ring_buffer_of_samples: RingBuffer[int] = RingBuffer( + buffer_size=2048, default_value=0 + ) self.fft_outputs: RingBuffer[List[float]] = RingBuffer( buffer_size=256, default_value=[0.0 * 1025] @@ -91,12 +94,15 @@ class SignatureGenerator: self.next_signature.number_samples / self.next_signature.sample_rate_hz < self.MAX_TIME_SECONDS or sum( - len(peaks) for peaks in self.next_signature.frequency_band_to_sound_peaks.values() + len(peaks) + for peaks in self.next_signature.frequency_band_to_sound_peaks.values() ) < self.MAX_PEAKS ): self.process_input( - self.input_pending_processing[self.samples_processed : self.samples_processed + 128] + self.input_pending_processing[ + self.samples_processed : self.samples_processed + 128 + ] ) self.samples_processed += 128 @@ -107,7 +113,9 @@ class SignatureGenerator: self.next_signature.number_samples = 0 self.next_signature.frequency_band_to_sound_peaks = {} - self.ring_buffer_of_samples: RingBuffer[int] = RingBuffer(buffer_size=2048, default_value=0) + self.ring_buffer_of_samples: RingBuffer[int] = RingBuffer( + buffer_size=2048, default_value=0 + ) self.fft_outputs: RingBuffer[List[float]] = RingBuffer( buffer_size=256, default_value=[0.0 * 1025] ) @@ -124,7 +132,9 @@ class SignatureGenerator: self.do_peak_spreading_and_recognition() def do_fft(self, batch_of_128_s16le_mono_samples): - type_ring = self.ring_buffer_of_samples.position + len(batch_of_128_s16le_mono_samples) + type_ring = self.ring_buffer_of_samples.position + len( + batch_of_128_s16le_mono_samples + ) self.ring_buffer_of_samples[ self.ring_buffer_of_samples.position : type_ring ] = batch_of_128_s16le_mono_samples @@ -159,10 +169,13 @@ class SignatureGenerator: temporary_array_1[1] = np.roll(temporary_array_1[1], -1) temporary_array_1[2] = np.roll(temporary_array_1[2], -2) - origin_last_fft_np = np.hstack([temporary_array_1.max(axis=0)[:-3], origin_last_fft[-3:]]) + origin_last_fft_np = np.hstack( + [temporary_array_1.max(axis=0)[:-3], origin_last_fft[-3:]] + ) i1, i2, i3 = [ - (self.spread_fft_output.position + former_fft_num) % self.spread_fft_output.buffer_size + (self.spread_fft_output.position + former_fft_num) + % self.spread_fft_output.buffer_size for former_fft_num in [-1, -3, -6] ] @@ -234,27 +247,38 @@ class SignatureGenerator: fft_number = self.spread_fft_output.num_written - 46 peak_magnitude = ( - np.log(max(1 / 64, fft_minus_46[bin_position])) * 1477.3 + 6144 + np.log(max(1 / 64, fft_minus_46[bin_position])) * 1477.3 + + 6144 ) peak_magnitude_before = ( - np.log(max(1 / 64, fft_minus_46[bin_position - 1])) * 1477.3 + 6144 + np.log(max(1 / 64, fft_minus_46[bin_position - 1])) * 1477.3 + + 6144 ) peak_magnitude_after = ( - np.log(max(1 / 64, fft_minus_46[bin_position + 1])) * 1477.3 + 6144 + np.log(max(1 / 64, fft_minus_46[bin_position + 1])) * 1477.3 + + 6144 ) peak_variation_1 = ( - peak_magnitude * 2 - peak_magnitude_before - peak_magnitude_after + peak_magnitude * 2 + - peak_magnitude_before + - peak_magnitude_after ) peak_variation_2 = ( - (peak_magnitude_after - peak_magnitude_before) * 32 / peak_variation_1 + (peak_magnitude_after - peak_magnitude_before) + * 32 + / peak_variation_1 ) - corrected_peak_frequency_bin = bin_position * 64 + peak_variation_2 + corrected_peak_frequency_bin = ( + bin_position * 64 + peak_variation_2 + ) assert peak_variation_1 > 0 - frequency_hz = corrected_peak_frequency_bin * (16000 / 2 / 1024 / 64) + frequency_hz = corrected_peak_frequency_bin * ( + 16000 / 2 / 1024 / 64 + ) if 250 < frequency_hz < 520: band = FrequencyBand.hz_250_520 @@ -267,7 +291,10 @@ class SignatureGenerator: else: continue - if band not in self.next_signature.frequency_band_to_sound_peaks: + if ( + band + not in self.next_signature.frequency_band_to_sound_peaks + ): self.next_signature.frequency_band_to_sound_peaks[band] = [] self.next_signature.frequency_band_to_sound_peaks[band].append( diff --git a/lib/ShazamIO/shazamio/api.py b/lib/ShazamIO/shazamio/api.py index e4b815c..39223f5 100644 --- a/lib/ShazamIO/shazamio/api.py +++ b/lib/ShazamIO/shazamio/api.py @@ -1,21 +1,17 @@ import pathlib -import uuid import time -from typing import Optional +import uuid +from typing import Any, Dict, Optional, Union from pydub import AudioSegment -from typing import Dict, Any, Union - -from .misc import Request -from .misc import ShazamUrl +from .converter import Converter, Geo +from .enums import GenreMusic +from .misc import Request, ShazamUrl from .schemas.artists import ArtistQuery from .signature import DecodedMessage -from .enums import GenreMusic -from .converter import Converter, Geo from .typehints import CountryCode -from .utils import ArtistQueryGenerator -from .utils import get_song +from .utils import ArtistQueryGenerator, get_song class Shazam(Converter, Geo, Request): @@ -27,7 +23,9 @@ class Shazam(Converter, Geo, Request): self.language = language self.endpoint_country = endpoint_country - async def top_world_tracks(self, limit: int = 200, offset: int = 0) -> Dict[str, Any]: + async def top_world_tracks( + self, limit: int = 200, offset: int = 0 + ) -> Dict[str, Any]: """ Search top world tracks @@ -292,7 +290,9 @@ class Shazam(Converter, Geo, Request): headers=self.headers(), ) - async def search_track(self, query: str, limit: int = 10, offset: int = 0) -> Dict[str, Any]: + async def search_track( + self, query: str, limit: int = 10, offset: int = 0 + ) -> Dict[str, Any]: """ Search all tracks by prefix :param query: Track full title or prefix title diff --git a/lib/ShazamIO/shazamio/client.py b/lib/ShazamIO/shazamio/client.py index 8862942..aa80990 100644 --- a/lib/ShazamIO/shazamio/client.py +++ b/lib/ShazamIO/shazamio/client.py @@ -1,5 +1,4 @@ import aiohttp - from shazamio.exceptions import BadMethod from shazamio.utils import validate_json diff --git a/lib/ShazamIO/shazamio/converter.py b/lib/ShazamIO/shazamio/converter.py index fa4ce5f..cbf9a68 100644 --- a/lib/ShazamIO/shazamio/converter.py +++ b/lib/ShazamIO/shazamio/converter.py @@ -60,5 +60,7 @@ class Converter: signature_generator.feed_input(audio.get_array_of_samples()) signature_generator.MAX_TIME_SECONDS = 12 if audio.duration_seconds > 12 * 3: - signature_generator.samples_processed += 16000 * (int(audio.duration_seconds / 2) - 6) + signature_generator.samples_processed += 16000 * ( + int(audio.duration_seconds / 2) - 6 + ) return signature_generator diff --git a/lib/ShazamIO/shazamio/factory_misc.py b/lib/ShazamIO/shazamio/factory_misc.py index b9d0076..2b94968 100644 --- a/lib/ShazamIO/shazamio/factory_misc.py +++ b/lib/ShazamIO/shazamio/factory_misc.py @@ -1,22 +1,19 @@ from dataclass_factory import Factory - from shazamio.factory import FactorySchemas -from shazamio.schemas.artists import ArtistInfo -from shazamio.schemas.artists import ArtistV3 +from shazamio.schemas.artists import ArtistInfo, ArtistV3 from shazamio.schemas.attributes import ArtistAttribute from shazamio.schemas.models import ( - SongSection, - VideoSection, - RelatedSection, - LyricsSection, - BeaconDataLyricsSection, ArtistSection, + BeaconDataLyricsSection, + LyricsSection, MatchModel, + RelatedSection, + ResponseTrack, + SongSection, + TrackInfo, + VideoSection, + YoutubeData, ) -from shazamio.schemas.models import TrackInfo -from shazamio.schemas.models import YoutubeData -from shazamio.schemas.models import ResponseTrack - FACTORY_TRACK = Factory( schemas={ diff --git a/lib/ShazamIO/shazamio/misc.py b/lib/ShazamIO/shazamio/misc.py index f88bb67..c1dc836 100644 --- a/lib/ShazamIO/shazamio/misc.py +++ b/lib/ShazamIO/shazamio/misc.py @@ -1,4 +1,5 @@ from random import choice + from shazamio.user_agent import USER_AGENTS @@ -47,9 +48,7 @@ class ShazamUrl: ) LISTENING_COUNTER = "https://www.shazam.com/services/count/v2/web/track/{}" - SEARCH_ARTIST_V2 = ( - "https://www.shazam.com/services/amapi/v1/catalog/{endpoint_country}/artists/{artist_id}" - ) + SEARCH_ARTIST_V2 = "https://www.shazam.com/services/amapi/v1/catalog/{endpoint_country}/artists/{artist_id}" class Request: diff --git a/lib/ShazamIO/shazamio/schemas/artist/views/full_albums.py b/lib/ShazamIO/shazamio/schemas/artist/views/full_albums.py index 3ee3278..b62651f 100644 --- a/lib/ShazamIO/shazamio/schemas/artist/views/full_albums.py +++ b/lib/ShazamIO/shazamio/schemas/artist/views/full_albums.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import List, Optional from pydantic import BaseModel, Field - from shazamio.schemas.attributes import AttributeName from shazamio.schemas.base import BaseDataModel from shazamio.schemas.photos import ImageModel diff --git a/lib/ShazamIO/shazamio/schemas/artist/views/last_release.py b/lib/ShazamIO/shazamio/schemas/artist/views/last_release.py index ce9518e..fc6e016 100644 --- a/lib/ShazamIO/shazamio/schemas/artist/views/last_release.py +++ b/lib/ShazamIO/shazamio/schemas/artist/views/last_release.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field - from shazamio.schemas.attributes import AttributeName from shazamio.schemas.base import BaseDataModel from shazamio.schemas.photos import ImageModel diff --git a/lib/ShazamIO/shazamio/schemas/artist/views/simular_artists.py b/lib/ShazamIO/shazamio/schemas/artist/views/simular_artists.py index 5147835..bf23579 100644 --- a/lib/ShazamIO/shazamio/schemas/artist/views/simular_artists.py +++ b/lib/ShazamIO/shazamio/schemas/artist/views/simular_artists.py @@ -3,10 +3,8 @@ from __future__ import annotations from typing import List, Optional from pydantic import BaseModel, Field - from shazamio.schemas.attributes import AttributeName -from shazamio.schemas.base import BaseHrefNextData -from shazamio.schemas.base import BaseIdTypeHref +from shazamio.schemas.base import BaseHrefNextData, BaseIdTypeHref from shazamio.schemas.photos import ImageModel diff --git a/lib/ShazamIO/shazamio/schemas/artist/views/top_music.py b/lib/ShazamIO/shazamio/schemas/artist/views/top_music.py index 8886311..20ccaae 100644 --- a/lib/ShazamIO/shazamio/schemas/artist/views/top_music.py +++ b/lib/ShazamIO/shazamio/schemas/artist/views/top_music.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field - from shazamio.schemas.attributes import AttributeName from shazamio.schemas.base import BaseDataModel from shazamio.schemas.photos import ImageModel diff --git a/lib/ShazamIO/shazamio/schemas/artist/views/top_song.py b/lib/ShazamIO/shazamio/schemas/artist/views/top_song.py index 8b0e135..a22ec60 100644 --- a/lib/ShazamIO/shazamio/schemas/artist/views/top_song.py +++ b/lib/ShazamIO/shazamio/schemas/artist/views/top_song.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import List, Optional from pydantic import BaseModel, Field - from shazamio.schemas.artist.views.top_music import PlayParams from shazamio.schemas.attributes import AttributeName from shazamio.schemas.base import BaseDataModel diff --git a/lib/ShazamIO/shazamio/schemas/artists.py b/lib/ShazamIO/shazamio/schemas/artists.py index 9105998..ab7a08d 100644 --- a/lib/ShazamIO/shazamio/schemas/artists.py +++ b/lib/ShazamIO/shazamio/schemas/artists.py @@ -1,20 +1,14 @@ -from dataclasses import dataclass -from dataclasses import field -from typing import List -from typing import Optional -from typing import Union - -from pydantic import BaseModel -from pydantic import Field +from dataclasses import dataclass, field +from typing import List, Optional, Union +from pydantic import BaseModel, Field from shazamio.schemas.artist.views.full_albums import FullAlbumsModel from shazamio.schemas.artist.views.last_release import LastReleaseModel from shazamio.schemas.artist.views.simular_artists import SimularArtist from shazamio.schemas.artist.views.top_music import TopMusicVideosView from shazamio.schemas.artist.views.top_song import TopSong from shazamio.schemas.attributes import ArtistAttribute -from shazamio.schemas.enums import ArtistExtend -from shazamio.schemas.enums import ArtistView +from shazamio.schemas.enums import ArtistExtend, ArtistView from shazamio.schemas.errors import ErrorModel @@ -80,7 +74,9 @@ class ArtistRelationships(BaseModel): class ArtistViews(BaseModel): - top_music_videos: Optional[TopMusicVideosView] = Field(None, alias="top-music-videos") + top_music_videos: Optional[TopMusicVideosView] = Field( + None, alias="top-music-videos" + ) simular_artists: Optional[SimularArtist] = Field(None, alias="similar-artists") latest_release: Optional[LastReleaseModel] = Field(None, alias="latest-release") full_albums: Optional[FullAlbumsModel] = Field(None, alias="full-albums") diff --git a/lib/ShazamIO/shazamio/schemas/attributes.py b/lib/ShazamIO/shazamio/schemas/attributes.py index c278cc6..55718f1 100644 --- a/lib/ShazamIO/shazamio/schemas/attributes.py +++ b/lib/ShazamIO/shazamio/schemas/attributes.py @@ -1,8 +1,6 @@ -from typing import List -from typing import Optional +from typing import List, Optional -from pydantic import BaseModel -from pydantic import Field +from pydantic import BaseModel, Field class AttributeName(BaseModel): diff --git a/lib/ShazamIO/shazamio/schemas/base.py b/lib/ShazamIO/shazamio/schemas/base.py index 7347a2b..13b6795 100644 --- a/lib/ShazamIO/shazamio/schemas/base.py +++ b/lib/ShazamIO/shazamio/schemas/base.py @@ -1,11 +1,8 @@ -from typing import Generic -from typing import Optional -from typing import TypeVar +from typing import Generic, Optional, TypeVar from pydantic import BaseModel from pydantic.generics import GenericModel - T = TypeVar("T", bound=BaseModel) diff --git a/lib/ShazamIO/shazamio/schemas/models.py b/lib/ShazamIO/shazamio/schemas/models.py index af8745b..2ed24ab 100644 --- a/lib/ShazamIO/shazamio/schemas/models.py +++ b/lib/ShazamIO/shazamio/schemas/models.py @@ -1,11 +1,6 @@ -from dataclasses import dataclass -from dataclasses import field -from typing import List -from typing import Optional -from typing import Union -from urllib.parse import urlencode -from urllib.parse import urlparse -from urllib.parse import urlunparse +from dataclasses import dataclass, field +from typing import List, Optional, Union +from urllib.parse import urlencode, urlparse, urlunparse from uuid import UUID diff --git a/lib/ShazamIO/shazamio/schemas/photos.py b/lib/ShazamIO/shazamio/schemas/photos.py index 94af3cd..5f331c9 100644 --- a/lib/ShazamIO/shazamio/schemas/photos.py +++ b/lib/ShazamIO/shazamio/schemas/photos.py @@ -1,7 +1,6 @@ from typing import Optional -from pydantic import BaseModel -from pydantic import Field +from pydantic import BaseModel, Field class ImageModel(BaseModel): diff --git a/lib/ShazamIO/shazamio/serializers.py b/lib/ShazamIO/shazamio/serializers.py index 094bcd9..c404113 100644 --- a/lib/ShazamIO/shazamio/serializers.py +++ b/lib/ShazamIO/shazamio/serializers.py @@ -1,13 +1,8 @@ from typing import Union -from shazamio.factory_misc import FACTORY_ARTIST -from shazamio.factory_misc import FACTORY_TRACK -from shazamio.schemas.artists import ArtistInfo -from shazamio.schemas.artists import ArtistResponse -from shazamio.schemas.artists import ArtistV2 -from shazamio.schemas.models import ResponseTrack -from shazamio.schemas.models import TrackInfo -from shazamio.schemas.models import YoutubeData +from shazamio.factory_misc import FACTORY_ARTIST, FACTORY_TRACK +from shazamio.schemas.artists import ArtistInfo, ArtistResponse, ArtistV2 +from shazamio.schemas.models import ResponseTrack, TrackInfo, YoutubeData class Serialize: diff --git a/lib/ShazamIO/shazamio/signature.py b/lib/ShazamIO/shazamio/signature.py index 225e80d..4d594fd 100644 --- a/lib/ShazamIO/shazamio/signature.py +++ b/lib/ShazamIO/shazamio/signature.py @@ -1,9 +1,10 @@ -from typing import Dict, List from base64 import b64decode, b64encode -from math import exp, sqrt from binascii import crc32 -from io import BytesIO from ctypes import * +from io import BytesIO +from math import exp, sqrt +from typing import Dict, List + from .enums import FrequencyBand, SampleRate DATA_URI_PREFIX = "data:audio/vnd.shazam.sig;base64," @@ -31,7 +32,7 @@ class RawSignatureHeader(LittleEndianStructure): # field above, # it can be inferred and subtracted so that we obtain the number of samples, # and from the number of samples and sample rate we can obtain the length of the recording - ("fixed_value", c_uint32) + ("fixed_value", c_uint32), # Calculated as ((15 << 19) + 0x40000) - 0x7c0000 or 00 00 7c 00 - seems pretty constant, # may be different in the "SigType.STREAMING" mode ] @@ -100,7 +101,9 @@ class DecodedMessage: assert crc32(check_summable_data) & 0xFFFFFFFF == header.crc32 assert header.magic2 == 0x94119C00 - self.sample_rate_hz = int(SampleRate(header.shifted_sample_rate_id >> 27).name.strip("_")) + self.sample_rate_hz = int( + SampleRate(header.shifted_sample_rate_id >> 27).name.strip("_") + ) self.number_samples = int( header.number_samples_plus_divided_sample_rate - self.sample_rate_hz * 0.24 @@ -145,13 +148,17 @@ class DecodedMessage: fft_pass_offset: int = raw_fft_pass[0] if fft_pass_offset == 0xFF: - fft_pass_number = int.from_bytes(frequency_peaks_buf.read(4), "little") + fft_pass_number = int.from_bytes( + frequency_peaks_buf.read(4), "little" + ) continue else: fft_pass_number += fft_pass_offset peak_magnitude = int.from_bytes(frequency_peaks_buf.read(2), "little") - corrected_peak_frequency_bin = int.from_bytes(frequency_peaks_buf.read(2), "little") + corrected_peak_frequency_bin = int.from_bytes( + frequency_peaks_buf.read(2), "little" + ) self.frequency_band_to_sound_peaks[frequency_band].append( FrequencyPeak( @@ -203,7 +210,9 @@ class DecodedMessage: header.magic1 = 0xCAFE2580 header.magic2 = 0x94119C00 - header.shifted_sample_rate_id = int(getattr(SampleRate, "_%s" % self.sample_rate_hz)) << 27 + header.shifted_sample_rate_id = ( + int(getattr(SampleRate, "_%s" % self.sample_rate_hz)) << 27 + ) header.fixed_value = (15 << 19) + 0x40000 header.number_samples_plus_divided_sample_rate = int( self.number_samples + self.sample_rate_hz * 0.24 @@ -211,7 +220,9 @@ class DecodedMessage: contents_buf = BytesIO() - for frequency_band, frequency_peaks in sorted(self.frequency_band_to_sound_peaks.items()): + for frequency_band, frequency_peaks in sorted( + self.frequency_band_to_sound_peaks.items() + ): peaks_buf = BytesIO() fft_pass_number = 0 @@ -225,13 +236,19 @@ class DecodedMessage: if frequency_peak.fft_pass_number - fft_pass_number >= 255: peaks_buf.write(b"\xff") - peaks_buf.write(frequency_peak.fft_pass_number.to_bytes(4, "little")) + peaks_buf.write( + frequency_peak.fft_pass_number.to_bytes(4, "little") + ) fft_pass_number = frequency_peak.fft_pass_number - peaks_buf.write(bytes([frequency_peak.fft_pass_number - fft_pass_number])) + peaks_buf.write( + bytes([frequency_peak.fft_pass_number - fft_pass_number]) + ) peaks_buf.write(frequency_peak.peak_magnitude.to_bytes(2, "little")) - peaks_buf.write(frequency_peak.corrected_peak_frequency_bin.to_bytes(2, "little")) + peaks_buf.write( + frequency_peak.corrected_peak_frequency_bin.to_bytes(2, "little") + ) fft_pass_number = frequency_peak.fft_pass_number @@ -245,7 +262,9 @@ class DecodedMessage: header.size_minus_header = len(contents_buf.getvalue()) + 8 buf = BytesIO() - buf.write(header) # We will rewrite it just after in order to include the final CRC-32 + buf.write( + header + ) # We will rewrite it just after in order to include the final CRC-32 buf.write((0x40000000).to_bytes(4, "little")) buf.write((len(contents_buf.getvalue()) + 8).to_bytes(4, "little")) diff --git a/lib/ShazamIO/shazamio/utils.py b/lib/ShazamIO/shazamio/utils.py index bfc5c79..d0d2898 100644 --- a/lib/ShazamIO/shazamio/utils.py +++ b/lib/ShazamIO/shazamio/utils.py @@ -1,16 +1,12 @@ import pathlib from enum import Enum from io import BytesIO -from typing import Dict -from typing import List -from typing import Optional -from typing import Union +from typing import Dict, List, Optional, Union import aiofiles import aiohttp from aiohttp import ContentTypeError from pydub import AudioSegment - from shazamio.exceptions import FailedDecodeJson from shazamio.schemas.artists import ArtistQuery diff --git a/lib/pytube/README.md b/lib/pytube/README.md deleted file mode 100644 index 6a6e3f2..0000000 --- a/lib/pytube/README.md +++ /dev/null @@ -1,93 +0,0 @@ -
-

- pytube logo -

-

- pypi - - -

-
- -### Actively soliciting contributors! - -Have ideas for how pytube can be improved? Feel free to open an issue or a pull request! - -# pytube - -*pytube* is a genuine, lightweight, dependency-free Python library (and command-line utility) for downloading YouTube videos. - -## Documentation - -Detailed documentation about the usage of the library can be found at [pytube.io](https://pytube.io). This is recommended for most cases. If you want to hastily download a single video, the [quick start](#Quickstart) guide below might be what you're looking for. - -## Description - -YouTube is the most popular video-sharing platform in the world and as a hacker, you may encounter a situation where you want to script something to download videos. For this, I present to you: *pytube*. - -*pytube* is a lightweight library written in Python. It has no third-party -dependencies and aims to be highly reliable. - -*pytube* also makes pipelining easy, allowing you to specify callback functions for different download events, such as ``on progress`` or ``on complete``. - -Furthermore, *pytube* includes a command-line utility, allowing you to download videos right from the terminal. - -## Features - -- Support for both progressive & DASH streams -- Support for downloading the complete playlist -- Easily register ``on_download_progress`` & ``on_download_complete`` callbacks -- Command-line interfaced included -- Caption track support -- Outputs caption tracks to .srt format (SubRip Subtitle) -- Ability to capture thumbnail URL -- Extensively documented source code -- No third-party dependencies - -## Quickstart - -This guide covers the most basic usage of the library. For more detailed information, please refer to [pytube.io](https://pytube.io). - -### Installation - -Pytube requires an installation of Python 3.6 or greater, as well as pip. (Pip is typically bundled with Python [installations](https://python.org/downloads).) - -To install from PyPI with pip: - -```bash -$ python -m pip install pytube -``` - -Sometimes, the PyPI release becomes slightly outdated. To install from the source with pip: - -```bash -$ python -m pip install git+https://github.com/pytube/pytube -``` - -### Using pytube in a Python script - -To download a video using the library in a script, you'll need to import the YouTube class from the library and pass an argument of the video URL. From there, you can access the streams and download them. - -```python - >>> from pytube import YouTube - >>> YouTube('https://youtu.be/2lAe1cqCOXo').streams.first().download() - >>> yt = YouTube('http://youtube.com/watch?v=2lAe1cqCOXo') - >>> yt.streams - ... .filter(progressive=True, file_extension='mp4') - ... .order_by('resolution') - ... .desc() - ... .first() - ... .download() -``` - -### Using the command-line interface - -Using the CLI is remarkably straightforward as well. To download a video at the highest progressive quality, you can use the following command: -```bash -$ pytube https://youtube.com/watch?v=2lAe1cqCOXo -``` - -You can also do the same for a playlist: -```bash -$ pytube https://www.youtube.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n -``` diff --git a/lib/pytube/pyproject.toml b/lib/pytube/pyproject.toml deleted file mode 100644 index c831a56..0000000 --- a/lib/pytube/pyproject.toml +++ /dev/null @@ -1,17 +0,0 @@ -[tool.poetry] -name = "pytube" -version = "15.0.1" -description = "Python 3 library for downloading YouTube Videos." -authors = ["Ronnie Ghose", "Taylor Fox Dahlin", "Nick Ficano"] -license = "The Unlicense (Unlicense)" -keywords = ["youtube", "download", "video", "stream",] -readme = "README.md" -homepage = "https://pytube.io" -repository = "https://github.com/pytube/pytube" - -[tool.poetry.dependencies] -python = ">=3.7" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/lib/pytube/pytube/__init__.py b/lib/pytube/pytube/__init__.py deleted file mode 100644 index 4eaa1b2..0000000 --- a/lib/pytube/pytube/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# flake8: noqa: F401 -# noreorder -""" -Pytube: a very serious Python library for downloading YouTube Videos. -""" -__title__ = "pytube" -__author__ = "Ronnie Ghose, Taylor Fox Dahlin, Nick Ficano" -__license__ = "The Unlicense (Unlicense)" -__js__ = None -__js_url__ = None - -from pytube.version import __version__ -from pytube.streams import Stream -from pytube.captions import Caption -from pytube.query import CaptionQuery, StreamQuery -from pytube.__main__ import YouTube -from pytube.contrib.playlist import Playlist -from pytube.contrib.channel import Channel -from pytube.contrib.search import Search diff --git a/lib/pytube/pytube/__main__.py b/lib/pytube/pytube/__main__.py deleted file mode 100644 index 60451d3..0000000 --- a/lib/pytube/pytube/__main__.py +++ /dev/null @@ -1,479 +0,0 @@ -""" -This module implements the core developer interface for pytube. - -The problem domain of the :class:`YouTube class focuses almost -exclusively on the developer interface. Pytube offloads the heavy lifting to -smaller peripheral modules and functions. - -""" -import logging -from typing import Any, Callable, Dict, List, Optional - -import pytube -import pytube.exceptions as exceptions -from pytube import extract, request -from pytube import Stream, StreamQuery -from pytube.helpers import install_proxy -from pytube.innertube import InnerTube -from pytube.metadata import YouTubeMetadata -from pytube.monostate import Monostate - -logger = logging.getLogger(__name__) - - -class YouTube: - """Core developer interface for pytube.""" - - def __init__( - self, - url: str, - on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None, - on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None, - proxies: Dict[str, str] = None, - use_oauth: bool = False, - allow_oauth_cache: bool = True - ): - """Construct a :class:`YouTube `. - - :param str url: - A valid YouTube watch URL. - :param func on_progress_callback: - (Optional) User defined callback function for stream download - progress events. - :param func on_complete_callback: - (Optional) User defined callback function for stream download - complete events. - :param dict proxies: - (Optional) A dict mapping protocol to proxy address which will be used by pytube. - :param bool use_oauth: - (Optional) Prompt the user to authenticate to YouTube. - If allow_oauth_cache is set to True, the user should only be prompted once. - :param bool allow_oauth_cache: - (Optional) Cache OAuth tokens locally on the machine. Defaults to True. - These tokens are only generated if use_oauth is set to True as well. - """ - self._js: Optional[str] = None # js fetched by js_url - self._js_url: Optional[str] = None # the url to the js, parsed from watch html - - self._vid_info: Optional[Dict] = None # content fetched from innertube/player - - self._watch_html: Optional[str] = None # the html of /watch?v= - self._embed_html: Optional[str] = None - self._player_config_args: Optional[Dict] = None # inline js in the html containing - self._age_restricted: Optional[bool] = None - - self._fmt_streams: Optional[List[Stream]] = None - - self._initial_data = None - self._metadata: Optional[YouTubeMetadata] = None - - # video_id part of /watch?v= - self.video_id = extract.video_id(url) - - self.watch_url = f"https://youtube.com/watch?v={self.video_id}" - self.embed_url = f"https://www.youtube.com/embed/{self.video_id}" - - # Shared between all instances of `Stream` (Borg pattern). - self.stream_monostate = Monostate( - on_progress=on_progress_callback, on_complete=on_complete_callback - ) - - if proxies: - install_proxy(proxies) - - self._author = None - self._title = None - self._publish_date = None - - self.use_oauth = use_oauth - self.allow_oauth_cache = allow_oauth_cache - - def __repr__(self): - return f'' - - def __eq__(self, o: object) -> bool: - # Compare types and urls, if they're same return true, else return false. - return type(o) == type(self) and o.watch_url == self.watch_url - - @property - def watch_html(self): - if self._watch_html: - return self._watch_html - self._watch_html = request.get(url=self.watch_url) - return self._watch_html - - @property - def embed_html(self): - if self._embed_html: - return self._embed_html - self._embed_html = request.get(url=self.embed_url) - return self._embed_html - - @property - def age_restricted(self): - if self._age_restricted: - return self._age_restricted - self._age_restricted = extract.is_age_restricted(self.watch_html) - return self._age_restricted - - @property - def js_url(self): - if self._js_url: - return self._js_url - - if self.age_restricted: - self._js_url = extract.js_url(self.embed_html) - else: - self._js_url = extract.js_url(self.watch_html) - - return self._js_url - - @property - def js(self): - if self._js: - return self._js - - # If the js_url doesn't match the cached url, fetch the new js and update - # the cache; otherwise, load the cache. - if pytube.__js_url__ != self.js_url: - self._js = request.get(self.js_url) - pytube.__js__ = self._js - pytube.__js_url__ = self.js_url - else: - self._js = pytube.__js__ - - return self._js - - @property - def initial_data(self): - if self._initial_data: - return self._initial_data - self._initial_data = extract.initial_data(self.watch_html) - return self._initial_data - - @property - def streaming_data(self): - """Return streamingData from video info.""" - if 'streamingData' in self.vid_info: - return self.vid_info['streamingData'] - else: - self.bypass_age_gate() - return self.vid_info['streamingData'] - - @property - def fmt_streams(self): - """Returns a list of streams if they have been initialized. - - If the streams have not been initialized, finds all relevant - streams and initializes them. - """ - self.check_availability() - if self._fmt_streams: - return self._fmt_streams - - self._fmt_streams = [] - - stream_manifest = extract.apply_descrambler(self.streaming_data) - - # If the cached js doesn't work, try fetching a new js file - # https://github.com/pytube/pytube/issues/1054 - try: - extract.apply_signature(stream_manifest, self.vid_info, self.js) - except exceptions.ExtractError: - # To force an update to the js file, we clear the cache and retry - self._js = None - self._js_url = None - pytube.__js__ = None - pytube.__js_url__ = None - extract.apply_signature(stream_manifest, self.vid_info, self.js) - - # build instances of :class:`Stream ` - # Initialize stream objects - for stream in stream_manifest: - video = Stream( - stream=stream, - monostate=self.stream_monostate, - ) - self._fmt_streams.append(video) - - self.stream_monostate.title = self.title - self.stream_monostate.duration = self.length - - return self._fmt_streams - - def check_availability(self): - """Check whether the video is available. - - Raises different exceptions based on why the video is unavailable, - otherwise does nothing. - """ - status, messages = extract.playability_status(self.watch_html) - - for reason in messages: - if status == 'UNPLAYABLE': - if reason == ( - 'Join this channel to get access to members-only content ' - 'like this video, and other exclusive perks.' - ): - raise exceptions.MembersOnly(video_id=self.video_id) - elif reason == 'This live stream recording is not available.': - raise exceptions.RecordingUnavailable(video_id=self.video_id) - else: - raise exceptions.VideoUnavailable(video_id=self.video_id) - elif status == 'LOGIN_REQUIRED': - if reason == ( - 'This is a private video. ' - 'Please sign in to verify that you may see it.' - ): - raise exceptions.VideoPrivate(video_id=self.video_id) - elif status == 'ERROR': - if reason == 'Video unavailable': - raise exceptions.VideoUnavailable(video_id=self.video_id) - elif status == 'LIVE_STREAM': - raise exceptions.LiveStreamError(video_id=self.video_id) - - @property - def vid_info(self): - """Parse the raw vid info and return the parsed result. - - :rtype: Dict[Any, Any] - """ - if self._vid_info: - return self._vid_info - - innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache) - - innertube_response = innertube.player(self.video_id) - self._vid_info = innertube_response - return self._vid_info - - def bypass_age_gate(self): - """Attempt to update the vid_info by bypassing the age gate.""" - innertube = InnerTube( - client='ANDROID_EMBED', - use_oauth=self.use_oauth, - allow_cache=self.allow_oauth_cache - ) - innertube_response = innertube.player(self.video_id) - - playability_status = innertube_response['playabilityStatus'].get('status', None) - - # If we still can't access the video, raise an exception - # (tier 3 age restriction) - if playability_status == 'UNPLAYABLE': - raise exceptions.AgeRestrictedError(self.video_id) - - self._vid_info = innertube_response - - @property - def caption_tracks(self) -> List[pytube.Caption]: - """Get a list of :class:`Caption `. - - :rtype: List[Caption] - """ - raw_tracks = ( - self.vid_info.get("captions", {}) - .get("playerCaptionsTracklistRenderer", {}) - .get("captionTracks", []) - ) - return [pytube.Caption(track) for track in raw_tracks] - - @property - def captions(self) -> pytube.CaptionQuery: - """Interface to query caption tracks. - - :rtype: :class:`CaptionQuery `. - """ - return pytube.CaptionQuery(self.caption_tracks) - - @property - def streams(self) -> StreamQuery: - """Interface to query both adaptive (DASH) and progressive streams. - - :rtype: :class:`StreamQuery `. - """ - self.check_availability() - return StreamQuery(self.fmt_streams) - - @property - def thumbnail_url(self) -> str: - """Get the thumbnail url image. - - :rtype: str - """ - thumbnail_details = ( - self.vid_info.get("videoDetails", {}) - .get("thumbnail", {}) - .get("thumbnails") - ) - if thumbnail_details: - thumbnail_details = thumbnail_details[-1] # last item has max size - return thumbnail_details["url"] - - return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg" - - @property - def publish_date(self): - """Get the publish date. - - :rtype: datetime - """ - if self._publish_date: - return self._publish_date - self._publish_date = extract.publish_date(self.watch_html) - return self._publish_date - - @publish_date.setter - def publish_date(self, value): - """Sets the publish date.""" - self._publish_date = value - - @property - def title(self) -> str: - """Get the video title. - - :rtype: str - """ - if self._title: - return self._title - - try: - self._title = self.vid_info['videoDetails']['title'] - except KeyError: - # Check_availability will raise the correct exception in most cases - # if it doesn't, ask for a report. - self.check_availability() - raise exceptions.PytubeError( - ( - f'Exception while accessing title of {self.watch_url}. ' - 'Please file a bug report at https://github.com/pytube/pytube' - ) - ) - - return self._title - - @title.setter - def title(self, value): - """Sets the title value.""" - self._title = value - - @property - def description(self) -> str: - """Get the video description. - - :rtype: str - """ - return self.vid_info.get("videoDetails", {}).get("shortDescription") - - @property - def rating(self) -> float: - """Get the video average rating. - - :rtype: float - - """ - return self.vid_info.get("videoDetails", {}).get("averageRating") - - @property - def length(self) -> int: - """Get the video length in seconds. - - :rtype: int - """ - return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds')) - - @property - def views(self) -> int: - """Get the number of the times the video has been viewed. - - :rtype: int - """ - return int(self.vid_info.get("videoDetails", {}).get("viewCount")) - - @property - def author(self) -> str: - """Get the video author. - :rtype: str - """ - if self._author: - return self._author - self._author = self.vid_info.get("videoDetails", {}).get( - "author", "unknown" - ) - return self._author - - @author.setter - def author(self, value): - """Set the video author.""" - self._author = value - - @property - def keywords(self) -> List[str]: - """Get the video keywords. - - :rtype: List[str] - """ - return self.vid_info.get('videoDetails', {}).get('keywords', []) - - @property - def channel_id(self) -> str: - """Get the video poster's channel id. - - :rtype: str - """ - return self.vid_info.get('videoDetails', {}).get('channelId', None) - - @property - def channel_url(self) -> str: - """Construct the channel url for the video's poster from the channel id. - - :rtype: str - """ - return f'https://www.youtube.com/channel/{self.channel_id}' - - @property - def metadata(self) -> Optional[YouTubeMetadata]: - """Get the metadata for the video. - - :rtype: YouTubeMetadata - """ - if self._metadata: - return self._metadata - else: - self._metadata = extract.metadata(self.initial_data) - return self._metadata - - def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]): - """Register a download progress callback function post initialization. - - :param callable func: - A callback function that takes ``stream``, ``chunk``, - and ``bytes_remaining`` as parameters. - - :rtype: None - - """ - self.stream_monostate.on_progress = func - - def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]): - """Register a download complete callback function post initialization. - - :param callable func: - A callback function that takes ``stream`` and ``file_path``. - - :rtype: None - - """ - self.stream_monostate.on_complete = func - - @staticmethod - def from_id(video_id: str) -> "YouTube": - """Construct a :class:`YouTube ` object from a video id. - - :param str video_id: - The video id of the YouTube video. - - :rtype: :class:`YouTube ` - - """ - return YouTube(f"https://www.youtube.com/watch?v={video_id}") diff --git a/lib/pytube/pytube/captions.py b/lib/pytube/pytube/captions.py deleted file mode 100644 index fe84bec..0000000 --- a/lib/pytube/pytube/captions.py +++ /dev/null @@ -1,164 +0,0 @@ -import math -import os -import time -import json -import xml.etree.ElementTree as ElementTree -from html import unescape -from typing import Dict, Optional - -from pytube import request -from pytube.helpers import safe_filename, target_directory - - -class Caption: - """Container for caption tracks.""" - - def __init__(self, caption_track: Dict): - """Construct a :class:`Caption `. - - :param dict caption_track: - Caption track data extracted from ``watch_html``. - """ - self.url = caption_track.get("baseUrl") - - # Certain videos have runs instead of simpleText - # this handles that edge case - name_dict = caption_track['name'] - if 'simpleText' in name_dict: - self.name = name_dict['simpleText'] - else: - for el in name_dict['runs']: - if 'text' in el: - self.name = el['text'] - - # Use "vssId" instead of "languageCode", fix issue #779 - self.code = caption_track["vssId"] - # Remove preceding '.' for backwards compatibility, e.g.: - # English -> vssId: .en, languageCode: en - # English (auto-generated) -> vssId: a.en, languageCode: en - self.code = self.code.strip('.') - - @property - def xml_captions(self) -> str: - """Download the xml caption tracks.""" - return request.get(self.url) - - @property - def json_captions(self) -> dict: - """Download and parse the json caption tracks.""" - json_captions_url = self.url.replace('fmt=srv3','fmt=json3') - text = request.get(json_captions_url) - parsed = json.loads(text) - assert parsed['wireMagic'] == 'pb3', 'Unexpected captions format' - return parsed - - def generate_srt_captions(self) -> str: - """Generate "SubRip Subtitle" captions. - - Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and - recompiles them into the "SubRip Subtitle" format. - """ - return self.xml_caption_to_srt(self.xml_captions) - - @staticmethod - def float_to_srt_time_format(d: float) -> str: - """Convert decimal durations into proper srt format. - - :rtype: str - :returns: - SubRip Subtitle (str) formatted time duration. - - float_to_srt_time_format(3.89) -> '00:00:03,890' - """ - fraction, whole = math.modf(d) - time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole)) - ms = f"{fraction:.3f}".replace("0.", "") - return time_fmt + ms - - def xml_caption_to_srt(self, xml_captions: str) -> str: - """Convert xml caption tracks to "SubRip Subtitle (srt)". - - :param str xml_captions: - XML formatted caption tracks. - """ - segments = [] - root = ElementTree.fromstring(xml_captions) - for i, child in enumerate(list(root)): - text = child.text or "" - caption = unescape(text.replace("\n", " ").replace(" ", " "),) - try: - duration = float(child.attrib["dur"]) - except KeyError: - duration = 0.0 - start = float(child.attrib["start"]) - end = start + duration - sequence_number = i + 1 # convert from 0-indexed to 1. - line = "{seq}\n{start} --> {end}\n{text}\n".format( - seq=sequence_number, - start=self.float_to_srt_time_format(start), - end=self.float_to_srt_time_format(end), - text=caption, - ) - segments.append(line) - return "\n".join(segments).strip() - - def download( - self, - title: str, - srt: bool = True, - output_path: Optional[str] = None, - filename_prefix: Optional[str] = None, - ) -> str: - """Write the media stream to disk. - - :param title: - Output filename (stem only) for writing media file. - If one is not specified, the default filename is used. - :type title: str - :param srt: - Set to True to download srt, false to download xml. Defaults to True. - :type srt bool - :param output_path: - (optional) Output path for writing media file. If one is not - specified, defaults to the current working directory. - :type output_path: str or None - :param filename_prefix: - (optional) A string that will be prepended to the filename. - For example a number in a playlist or the name of a series. - If one is not specified, nothing will be prepended - This is separate from filename so you can use the default - filename but still add a prefix. - :type filename_prefix: str or None - - :rtype: str - """ - if title.endswith(".srt") or title.endswith(".xml"): - filename = ".".join(title.split(".")[:-1]) - else: - filename = title - - if filename_prefix: - filename = f"{safe_filename(filename_prefix)}{filename}" - - filename = safe_filename(filename) - - filename += f" ({self.code})" - - if srt: - filename += ".srt" - else: - filename += ".xml" - - file_path = os.path.join(target_directory(output_path), filename) - - with open(file_path, "w", encoding="utf-8") as file_handle: - if srt: - file_handle.write(self.generate_srt_captions()) - else: - file_handle.write(self.xml_captions) - - return file_path - - def __repr__(self): - """Printable object representation.""" - return ''.format(s=self) diff --git a/lib/pytube/pytube/cipher.py b/lib/pytube/pytube/cipher.py deleted file mode 100644 index 77b0f2a..0000000 --- a/lib/pytube/pytube/cipher.py +++ /dev/null @@ -1,697 +0,0 @@ -""" -This module contains all logic necessary to decipher the signature. - -YouTube's strategy to restrict downloading videos is to send a ciphered version -of the signature to the client, along with the decryption algorithm obfuscated -in JavaScript. For the clients to play the videos, JavaScript must take the -ciphered version, cycle it through a series of "transform functions," and then -signs the media URL with the output. - -This module is responsible for (1) finding and extracting those "transform -functions" (2) maps them to Python equivalents and (3) taking the ciphered -signature and decoding it. - -""" -import logging -import re -from itertools import chain -from typing import Any, Callable, Dict, List, Optional, Tuple - -from pytube.exceptions import ExtractError, RegexMatchError -from pytube.helpers import cache, regex_search -from pytube.parser import find_object_from_startpoint, throttling_array_split - -logger = logging.getLogger(__name__) - - -class Cipher: - def __init__(self, js: str): - self.transform_plan: List[str] = get_transform_plan(js) - var_regex = re.compile(r"^\w+\W") - var_match = var_regex.search(self.transform_plan[0]) - if not var_match: - raise RegexMatchError( - caller="__init__", pattern=var_regex.pattern - ) - var = var_match.group(0)[:-1] - self.transform_map = get_transform_map(js, var) - self.js_func_patterns = [ - r"\w+\.(\w+)\(\w,(\d+)\)", - r"\w+\[(\"\w+\")\]\(\w,(\d+)\)" - ] - - self.throttling_plan = get_throttling_plan(js) - self.throttling_array = get_throttling_function_array(js) - - self.calculated_n = None - - def calculate_n(self, initial_n: list): - """Converts n to the correct value to prevent throttling.""" - if self.calculated_n: - return self.calculated_n - - # First, update all instances of 'b' with the list(initial_n) - for i in range(len(self.throttling_array)): - if self.throttling_array[i] == 'b': - self.throttling_array[i] = initial_n - - for step in self.throttling_plan: - curr_func = self.throttling_array[int(step[0])] - if not callable(curr_func): - logger.debug(f'{curr_func} is not callable.') - logger.debug(f'Throttling array:\n{self.throttling_array}\n') - raise ExtractError(f'{curr_func} is not callable.') - - first_arg = self.throttling_array[int(step[1])] - - if len(step) == 2: - curr_func(first_arg) - elif len(step) == 3: - second_arg = self.throttling_array[int(step[2])] - curr_func(first_arg, second_arg) - - self.calculated_n = ''.join(initial_n) - return self.calculated_n - - def get_signature(self, ciphered_signature: str) -> str: - """Decipher the signature. - - Taking the ciphered signature, applies the transform functions. - - :param str ciphered_signature: - The ciphered signature sent in the ``player_config``. - :rtype: str - :returns: - Decrypted signature required to download the media content. - """ - signature = list(ciphered_signature) - - for js_func in self.transform_plan: - name, argument = self.parse_function(js_func) # type: ignore - signature = self.transform_map[name](signature, argument) - logger.debug( - "applied transform function\n" - "output: %s\n" - "js_function: %s\n" - "argument: %d\n" - "function: %s", - "".join(signature), - name, - argument, - self.transform_map[name], - ) - - return "".join(signature) - - @cache - def parse_function(self, js_func: str) -> Tuple[str, int]: - """Parse the Javascript transform function. - - Break a JavaScript transform function down into a two element ``tuple`` - containing the function name and some integer-based argument. - - :param str js_func: - The JavaScript version of the transform function. - :rtype: tuple - :returns: - two element tuple containing the function name and an argument. - - **Example**: - - parse_function('DE.AJ(a,15)') - ('AJ', 15) - - """ - logger.debug("parsing transform function") - for pattern in self.js_func_patterns: - regex = re.compile(pattern) - parse_match = regex.search(js_func) - if parse_match: - fn_name, fn_arg = parse_match.groups() - return fn_name, int(fn_arg) - - raise RegexMatchError( - caller="parse_function", pattern="js_func_patterns" - ) - - -def get_initial_function_name(js: str) -> str: - """Extract the name of the function responsible for computing the signature. - :param str js: - The contents of the base.js asset file. - :rtype: str - :returns: - Function name from regex match - """ - - function_patterns = [ - r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501 - r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501 - r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r"\.sig\|\|(?P[a-zA-Z0-9$]+)\(", - r"yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501 - ] - logger.debug("finding initial function name") - for pattern in function_patterns: - regex = re.compile(pattern) - function_match = regex.search(js) - if function_match: - logger.debug("finished regex search, matched: %s", pattern) - return function_match.group(1) - - raise RegexMatchError( - caller="get_initial_function_name", pattern="multiple" - ) - - -def get_transform_plan(js: str) -> List[str]: - """Extract the "transform plan". - - The "transform plan" is the functions that the ciphered signature is - cycled through to obtain the actual signature. - - :param str js: - The contents of the base.js asset file. - - **Example**: - - ['DE.AJ(a,15)', - 'DE.VR(a,3)', - 'DE.AJ(a,51)', - 'DE.VR(a,3)', - 'DE.kT(a,51)', - 'DE.kT(a,8)', - 'DE.VR(a,3)', - 'DE.kT(a,21)'] - """ - name = re.escape(get_initial_function_name(js)) - pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name - logger.debug("getting transform plan") - return regex_search(pattern, js, group=1).split(";") - - -def get_transform_object(js: str, var: str) -> List[str]: - """Extract the "transform object". - - The "transform object" contains the function definitions referenced in the - "transform plan". The ``var`` argument is the obfuscated variable name - which contains these functions, for example, given the function call - ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var. - - :param str js: - The contents of the base.js asset file. - :param str var: - The obfuscated variable name that stores an object with all functions - that descrambles the signature. - - **Example**: - - >>> get_transform_object(js, 'DE') - ['AJ:function(a){a.reverse()}', - 'VR:function(a,b){a.splice(0,b)}', - 'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}'] - - """ - pattern = r"var %s={(.*?)};" % re.escape(var) - logger.debug("getting transform object") - regex = re.compile(pattern, flags=re.DOTALL) - transform_match = regex.search(js) - if not transform_match: - raise RegexMatchError(caller="get_transform_object", pattern=pattern) - - return transform_match.group(1).replace("\n", " ").split(", ") - - -def get_transform_map(js: str, var: str) -> Dict: - """Build a transform function lookup. - - Build a lookup table of obfuscated JavaScript function names to the - Python equivalents. - - :param str js: - The contents of the base.js asset file. - :param str var: - The obfuscated variable name that stores an object with all functions - that descrambles the signature. - - """ - transform_object = get_transform_object(js, var) - mapper = {} - for obj in transform_object: - # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()} - name, function = obj.split(":", 1) - fn = map_functions(function) - mapper[name] = fn - return mapper - - -def get_throttling_function_name(js: str) -> str: - """Extract the name of the function that computes the throttling parameter. - - :param str js: - The contents of the base.js asset file. - :rtype: str - :returns: - The name of the function used to compute the throttling parameter. - """ - function_patterns = [ - # https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-865985377 - # https://github.com/yt-dlp/yt-dlp/commit/48416bc4a8f1d5ff07d5977659cb8ece7640dcd8 - # var Bpa = [iha]; - # ... - # a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b), - # Bpa.length || iha("")) }}; - # In the above case, `iha` is the relevant function name - r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&.*?\|\|\s*([a-z]+)', - r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])\([a-z]\)', - ] - logger.debug('Finding throttling function name') - for pattern in function_patterns: - regex = re.compile(pattern) - function_match = regex.search(js) - if function_match: - logger.debug("finished regex search, matched: %s", pattern) - if len(function_match.groups()) == 1: - return function_match.group(1) - idx = function_match.group(2) - if idx: - idx = idx.strip("[]") - array = re.search( - r'var {nfunc}\s*=\s*(\[.+?\]);'.format( - nfunc=re.escape(function_match.group(1))), - js - ) - if array: - array = array.group(1).strip("[]").split(",") - array = [x.strip() for x in array] - return array[int(idx)] - - raise RegexMatchError( - caller="get_throttling_function_name", pattern="multiple" - ) - - -def get_throttling_function_code(js: str) -> str: - """Extract the raw code for the throttling function. - - :param str js: - The contents of the base.js asset file. - :rtype: str - :returns: - The name of the function used to compute the throttling parameter. - """ - # Begin by extracting the correct function name - name = re.escape(get_throttling_function_name(js)) - - # Identify where the function is defined - pattern_start = r"%s=function\(\w\)" % name - regex = re.compile(pattern_start) - match = regex.search(js) - - # Extract the code within curly braces for the function itself, and merge any split lines - code_lines_list = find_object_from_startpoint(js, match.span()[1]).split('\n') - joined_lines = "".join(code_lines_list) - - # Prepend function definition (e.g. `Dea=function(a)`) - return match.group(0) + joined_lines - - -def get_throttling_function_array(js: str) -> List[Any]: - """Extract the "c" array. - - :param str js: - The contents of the base.js asset file. - :returns: - The array of various integers, arrays, and functions. - """ - raw_code = get_throttling_function_code(js) - - array_start = r",c=\[" - array_regex = re.compile(array_start) - match = array_regex.search(raw_code) - - array_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1) - str_array = throttling_array_split(array_raw) - - converted_array = [] - for el in str_array: - try: - converted_array.append(int(el)) - continue - except ValueError: - # Not an integer value. - pass - - if el == 'null': - converted_array.append(None) - continue - - if el.startswith('"') and el.endswith('"'): - # Convert e.g. '"abcdef"' to string without quotation marks, 'abcdef' - converted_array.append(el[1:-1]) - continue - - if el.startswith('function'): - mapper = ( - (r"{for\(\w=\(\w%\w\.length\+\w\.length\)%\w\.length;\w--;\)\w\.unshift\(\w.pop\(\)\)}", throttling_unshift), # noqa:E501 - (r"{\w\.reverse\(\)}", throttling_reverse), - (r"{\w\.push\(\w\)}", throttling_push), - (r";var\s\w=\w\[0\];\w\[0\]=\w\[\w\];\w\[\w\]=\w}", throttling_swap), - (r"case\s\d+", throttling_cipher_function), - (r"\w\.splice\(0,1,\w\.splice\(\w,1,\w\[0\]\)\[0\]\)", throttling_nested_splice), # noqa:E501 - (r";\w\.splice\(\w,1\)}", js_splice), - (r"\w\.splice\(-\w\)\.reverse\(\)\.forEach\(function\(\w\){\w\.unshift\(\w\)}\)", throttling_prepend), # noqa:E501 - (r"for\(var \w=\w\.length;\w;\)\w\.push\(\w\.splice\(--\w,1\)\[0\]\)}", throttling_reverse), # noqa:E501 - ) - - found = False - for pattern, fn in mapper: - if re.search(pattern, el): - converted_array.append(fn) - found = True - if found: - continue - - converted_array.append(el) - - # Replace null elements with array itself - for i in range(len(converted_array)): - if converted_array[i] is None: - converted_array[i] = converted_array - - return converted_array - - -def get_throttling_plan(js: str): - """Extract the "throttling plan". - - The "throttling plan" is a list of tuples used for calling functions - in the c array. The first element of the tuple is the index of the - function to call, and any remaining elements of the tuple are arguments - to pass to that function. - - :param str js: - The contents of the base.js asset file. - :returns: - The full function code for computing the throttlign parameter. - """ - raw_code = get_throttling_function_code(js) - - transform_start = r"try{" - plan_regex = re.compile(transform_start) - match = plan_regex.search(raw_code) - - transform_plan_raw = js - - # Steps are either c[x](c[y]) or c[x](c[y],c[z]) - step_start = r"c\[(\d+)\]\(c\[(\d+)\](,c(\[(\d+)\]))?\)" - step_regex = re.compile(step_start) - matches = step_regex.findall(transform_plan_raw) - transform_steps = [] - for match in matches: - if match[4] != '': - transform_steps.append((match[0],match[1],match[4])) - else: - transform_steps.append((match[0],match[1])) - - return transform_steps - - -def reverse(arr: List, _: Optional[Any]): - """Reverse elements in a list. - - This function is equivalent to: - - .. code-block:: javascript - - function(a, b) { a.reverse() } - - This method takes an unused ``b`` variable as their transform functions - universally sent two arguments. - - **Example**: - - >>> reverse([1, 2, 3, 4]) - [4, 3, 2, 1] - """ - return arr[::-1] - - -def splice(arr: List, b: int): - """Add/remove items to/from a list. - - This function is equivalent to: - - .. code-block:: javascript - - function(a, b) { a.splice(0, b) } - - **Example**: - - >>> splice([1, 2, 3, 4], 2) - [1, 2] - """ - return arr[b:] - - -def swap(arr: List, b: int): - """Swap positions at b modulus the list length. - - This function is equivalent to: - - .. code-block:: javascript - - function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c } - - **Example**: - - >>> swap([1, 2, 3, 4], 2) - [3, 2, 1, 4] - """ - r = b % len(arr) - return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :])) - - -def throttling_reverse(arr: list): - """Reverses the input list. - - Needs to do an in-place reversal so that the passed list gets changed. - To accomplish this, we create a reversed copy, and then change each - indvidual element. - """ - reverse_copy = arr.copy()[::-1] - for i in range(len(reverse_copy)): - arr[i] = reverse_copy[i] - - -def throttling_push(d: list, e: Any): - """Pushes an element onto a list.""" - d.append(e) - - -def throttling_mod_func(d: list, e: int): - """Perform the modular function from the throttling array functions. - - In the javascript, the modular operation is as follows: - e = (e % d.length + d.length) % d.length - - We simply translate this to python here. - """ - return (e % len(d) + len(d)) % len(d) - - -def throttling_unshift(d: list, e: int): - """Rotates the elements of the list to the right. - - In the javascript, the operation is as follows: - for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop()) - """ - e = throttling_mod_func(d, e) - new_arr = d[-e:] + d[:-e] - d.clear() - for el in new_arr: - d.append(el) - - -def throttling_cipher_function(d: list, e: str): - """This ciphers d with e to generate a new list. - - In the javascript, the operation is as follows: - var h = [A-Za-z0-9-_], f = 96; // simplified from switch-case loop - d.forEach( - function(l,m,n){ - this.push( - n[m]=h[ - (h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length - ] - ) - }, - e.split("") - ) - """ - h = list('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_') - f = 96 - # by naming it "this" we can more closely reflect the js - this = list(e) - - # This is so we don't run into weirdness with enumerate while - # we change the input list - copied_list = d.copy() - - for m, l in enumerate(copied_list): - bracket_val = (h.index(l) - h.index(this[m]) + m - 32 + f) % len(h) - this.append( - h[bracket_val] - ) - d[m] = h[bracket_val] - f -= 1 - - -def throttling_nested_splice(d: list, e: int): - """Nested splice function in throttling js. - - In the javascript, the operation is as follows: - function(d,e){ - e=(e%d.length+d.length)%d.length; - d.splice( - 0, - 1, - d.splice( - e, - 1, - d[0] - )[0] - ) - } - - While testing, all this seemed to do is swap element 0 and e, - but the actual process is preserved in case there was an edge - case that was not considered. - """ - e = throttling_mod_func(d, e) - inner_splice = js_splice( - d, - e, - 1, - d[0] - ) - js_splice( - d, - 0, - 1, - inner_splice[0] - ) - - -def throttling_prepend(d: list, e: int): - """ - - In the javascript, the operation is as follows: - function(d,e){ - e=(e%d.length+d.length)%d.length; - d.splice(-e).reverse().forEach( - function(f){ - d.unshift(f) - } - ) - } - - Effectively, this moves the last e elements of d to the beginning. - """ - start_len = len(d) - # First, calculate e - e = throttling_mod_func(d, e) - - # Then do the prepending - new_arr = d[-e:] + d[:-e] - - # And update the input list - d.clear() - for el in new_arr: - d.append(el) - - end_len = len(d) - assert start_len == end_len - - -def throttling_swap(d: list, e: int): - """Swap positions of the 0'th and e'th elements in-place.""" - e = throttling_mod_func(d, e) - f = d[0] - d[0] = d[e] - d[e] = f - - -def js_splice(arr: list, start: int, delete_count=None, *items): - """Implementation of javascript's splice function. - - :param list arr: - Array to splice - :param int start: - Index at which to start changing the array - :param int delete_count: - Number of elements to delete from the array - :param *items: - Items to add to the array - - Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice # noqa:E501 - """ - # Special conditions for start value - try: - if start > len(arr): - start = len(arr) - # If start is negative, count backwards from end - if start < 0: - start = len(arr) - start - except TypeError: - # Non-integer start values are treated as 0 in js - start = 0 - - # Special condition when delete_count is greater than remaining elements - if not delete_count or delete_count >= len(arr) - start: - delete_count = len(arr) - start # noqa: N806 - - deleted_elements = arr[start:start + delete_count] - - # Splice appropriately. - new_arr = arr[:start] + list(items) + arr[start + delete_count:] - - # Replace contents of input array - arr.clear() - for el in new_arr: - arr.append(el) - - return deleted_elements - - -def map_functions(js_func: str) -> Callable: - """For a given JavaScript transform function, return the Python equivalent. - - :param str js_func: - The JavaScript version of the transform function. - """ - mapper = ( - # function(a){a.reverse()} - (r"{\w\.reverse\(\)}", reverse), - # function(a,b){a.splice(0,b)} - (r"{\w\.splice\(0,\w\)}", splice), - # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c} - (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap), - # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c} - ( - r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}", - swap, - ), - ) - - for pattern, fn in mapper: - if re.search(pattern, js_func): - return fn - raise RegexMatchError(caller="map_functions", pattern="multiple") diff --git a/lib/pytube/pytube/cli.py b/lib/pytube/pytube/cli.py deleted file mode 100644 index c403497..0000000 --- a/lib/pytube/pytube/cli.py +++ /dev/null @@ -1,560 +0,0 @@ -#!/usr/bin/env python3 -"""A simple command line application to download youtube videos.""" -import argparse -import gzip -import json -import logging -import os -import shutil -import sys -import datetime as dt -import subprocess # nosec -from typing import List, Optional - -import pytube.exceptions as exceptions -from pytube import __version__ -from pytube import CaptionQuery, Playlist, Stream, YouTube -from pytube.helpers import safe_filename, setup_logger - - -logger = logging.getLogger(__name__) - - -def main(): - """Command line application to download youtube videos.""" - # noinspection PyTypeChecker - parser = argparse.ArgumentParser(description=main.__doc__) - args = _parse_args(parser) - if args.verbose: - log_filename = None - if args.logfile: - log_filename = args.logfile - setup_logger(logging.DEBUG, log_filename=log_filename) - logger.debug(f'Pytube version: {__version__}') - - if not args.url or "youtu" not in args.url: - parser.print_help() - sys.exit(1) - - if "/playlist" in args.url: - print("Loading playlist...") - playlist = Playlist(args.url) - if not args.target: - args.target = safe_filename(playlist.title) - for youtube_video in playlist.videos: - try: - _perform_args_on_youtube(youtube_video, args) - except exceptions.PytubeError as e: - print(f"There was an error with video: {youtube_video}") - print(e) - else: - print("Loading video...") - youtube = YouTube(args.url) - _perform_args_on_youtube(youtube, args) - - -def _perform_args_on_youtube( - youtube: YouTube, args: argparse.Namespace -) -> None: - if len(sys.argv) == 2 : # no arguments parsed - download_highest_resolution_progressive( - youtube=youtube, resolution="highest", target=args.target - ) - if args.list_captions: - _print_available_captions(youtube.captions) - if args.list: - display_streams(youtube) - if args.build_playback_report: - build_playback_report(youtube) - if args.itag: - download_by_itag(youtube=youtube, itag=args.itag, target=args.target) - if args.caption_code: - download_caption( - youtube=youtube, lang_code=args.caption_code, target=args.target - ) - if args.resolution: - download_by_resolution( - youtube=youtube, resolution=args.resolution, target=args.target - ) - if args.audio: - download_audio( - youtube=youtube, filetype=args.audio, target=args.target - ) - if args.ffmpeg: - ffmpeg_process( - youtube=youtube, resolution=args.ffmpeg, target=args.target - ) - - -def _parse_args( - parser: argparse.ArgumentParser, args: Optional[List] = None -) -> argparse.Namespace: - parser.add_argument( - "url", help="The YouTube /watch or /playlist url", nargs="?" - ) - parser.add_argument( - "--version", action="version", version="%(prog)s " + __version__, - ) - parser.add_argument( - "--itag", type=int, help="The itag for the desired stream", - ) - parser.add_argument( - "-r", - "--resolution", - type=str, - help="The resolution for the desired stream", - ) - parser.add_argument( - "-l", - "--list", - action="store_true", - help=( - "The list option causes pytube cli to return a list of streams " - "available to download" - ), - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - dest="verbose", - help="Set logger output to verbose output.", - ) - parser.add_argument( - "--logfile", - action="store", - help="logging debug and error messages into a log file", - ) - parser.add_argument( - "--build-playback-report", - action="store_true", - help="Save the html and js to disk", - ) - parser.add_argument( - "-c", - "--caption-code", - type=str, - help=( - "Download srt captions for given language code. " - "Prints available language codes if no argument given" - ), - ) - parser.add_argument( - '-lc', - '--list-captions', - action='store_true', - help=( - "List available caption codes for a video" - ) - ) - parser.add_argument( - "-t", - "--target", - help=( - "The output directory for the downloaded stream. " - "Default is current working directory" - ), - ) - parser.add_argument( - "-a", - "--audio", - const="mp4", - nargs="?", - help=( - "Download the audio for a given URL at the highest bitrate available. " - "Defaults to mp4 format if none is specified" - ), - ) - parser.add_argument( - "-f", - "--ffmpeg", - const="best", - nargs="?", - help=( - "Downloads the audio and video stream for resolution provided. " - "If no resolution is provided, downloads the best resolution. " - "Runs the command line program ffmpeg to combine the audio and video" - ), - ) - - return parser.parse_args(args) - - -def build_playback_report(youtube: YouTube) -> None: - """Serialize the request data to json for offline debugging. - - :param YouTube youtube: - A YouTube object. - """ - ts = int(dt.datetime.utcnow().timestamp()) - fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz") - - js = youtube.js - watch_html = youtube.watch_html - vid_info = youtube.vid_info - - with gzip.open(fp, "wb") as fh: - fh.write( - json.dumps( - { - "url": youtube.watch_url, - "js": js, - "watch_html": watch_html, - "video_info": vid_info, - } - ).encode("utf8"), - ) - - -def display_progress_bar( - bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55 -) -> None: - """Display a simple, pretty progress bar. - - Example: - ~~~~~~~~ - PSY - GANGNAM STYLE(강남스타일) MV.mp4 - ↳ |███████████████████████████████████████| 100.0% - - :param int bytes_received: - The delta between the total file size (bytes) and bytes already - written to disk. - :param int filesize: - File size of the media stream in bytes. - :param str ch: - Character to use for presenting progress segment. - :param float scale: - Scale multiplier to reduce progress bar size. - - """ - columns = shutil.get_terminal_size().columns - max_width = int(columns * scale) - - filled = int(round(max_width * bytes_received / float(filesize))) - remaining = max_width - filled - progress_bar = ch * filled + " " * remaining - percent = round(100.0 * bytes_received / float(filesize), 1) - text = f" ↳ |{progress_bar}| {percent}%\r" - sys.stdout.write(text) - sys.stdout.flush() - - -# noinspection PyUnusedLocal -def on_progress( - stream: Stream, chunk: bytes, bytes_remaining: int -) -> None: # pylint: disable=W0613 - filesize = stream.filesize - bytes_received = filesize - bytes_remaining - display_progress_bar(bytes_received, filesize) - - -def _download( - stream: Stream, - target: Optional[str] = None, - filename: Optional[str] = None, -) -> None: - filesize_megabytes = stream.filesize // 1048576 - print(f"{filename or stream.default_filename} | {filesize_megabytes} MB") - file_path = stream.get_file_path(filename=filename, output_path=target) - if stream.exists_at_path(file_path): - print(f"Already downloaded at:\n{file_path}") - return - - stream.download(output_path=target, filename=filename) - sys.stdout.write("\n") - - -def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str: - """ - Given a base name, the file format, and the target directory, will generate - a filename unique for that directory and file format. - :param str base: - The given base-name. - :param str subtype: - The filetype of the video which will be downloaded. - :param str media_type: - The media_type of the file, ie. "audio" or "video" - :param Path target: - Target directory for download. - """ - counter = 0 - while True: - file_name = f"{base}_{media_type}_{counter}" - file_path = os.path.join(target, f"{file_name}.{subtype}") - if not os.path.exists(file_path): - return file_name - counter += 1 - - -def ffmpeg_process( - youtube: YouTube, resolution: str, target: Optional[str] = None -) -> None: - """ - Decides the correct video stream to download, then calls _ffmpeg_downloader. - - :param YouTube youtube: - A valid YouTube object. - :param str resolution: - YouTube video resolution. - :param str target: - Target directory for download - """ - youtube.register_on_progress_callback(on_progress) - target = target or os.getcwd() - - if resolution == "best": - highest_quality_stream = ( - youtube.streams.filter(progressive=False) - .order_by("resolution") - .last() - ) - mp4_stream = ( - youtube.streams.filter(progressive=False, subtype="mp4") - .order_by("resolution") - .last() - ) - if highest_quality_stream.resolution == mp4_stream.resolution: - video_stream = mp4_stream - else: - video_stream = highest_quality_stream - else: - video_stream = youtube.streams.filter( - progressive=False, resolution=resolution, subtype="mp4" - ).first() - if not video_stream: - video_stream = youtube.streams.filter( - progressive=False, resolution=resolution - ).first() - if video_stream is None: - print(f"Could not find a stream with resolution: {resolution}") - print("Try one of these:") - display_streams(youtube) - sys.exit() - - audio_stream = youtube.streams.get_audio_only(video_stream.subtype) - if not audio_stream: - audio_stream = ( - youtube.streams.filter(only_audio=True).order_by("abr").last() - ) - if not audio_stream: - print("Could not find an audio only stream") - sys.exit() - _ffmpeg_downloader( - audio_stream=audio_stream, video_stream=video_stream, target=target - ) - - -def _ffmpeg_downloader( - audio_stream: Stream, video_stream: Stream, target: str -) -> None: - """ - Given a YouTube Stream object, finds the correct audio stream, downloads them both - giving them a unique name, them uses ffmpeg to create a new file with the audio - and video from the previously downloaded files. Then deletes the original adaptive - streams, leaving the combination. - - :param Stream audio_stream: - A valid Stream object representing the audio to download - :param Stream video_stream: - A valid Stream object representing the video to download - :param Path target: - A valid Path object - """ - video_unique_name = _unique_name( - safe_filename(video_stream.title), - video_stream.subtype, - "video", - target=target, - ) - audio_unique_name = _unique_name( - safe_filename(video_stream.title), - audio_stream.subtype, - "audio", - target=target, - ) - _download(stream=video_stream, target=target, filename=video_unique_name) - print("Loading audio...") - _download(stream=audio_stream, target=target, filename=audio_unique_name) - - video_path = os.path.join( - target, f"{video_unique_name}.{video_stream.subtype}" - ) - audio_path = os.path.join( - target, f"{audio_unique_name}.{audio_stream.subtype}" - ) - final_path = os.path.join( - target, f"{safe_filename(video_stream.title)}.{video_stream.subtype}" - ) - - subprocess.run( # nosec - [ - "ffmpeg", - "-i", - video_path, - "-i", - audio_path, - "-codec", - "copy", - final_path, - ] - ) - os.unlink(video_path) - os.unlink(audio_path) - - -def download_by_itag( - youtube: YouTube, itag: int, target: Optional[str] = None -) -> None: - """Start downloading a YouTube video. - - :param YouTube youtube: - A valid YouTube object. - :param int itag: - YouTube format identifier code. - :param str target: - Target directory for download - """ - stream = youtube.streams.get_by_itag(itag) - if stream is None: - print(f"Could not find a stream with itag: {itag}") - print("Try one of these:") - display_streams(youtube) - sys.exit() - - youtube.register_on_progress_callback(on_progress) - - try: - _download(stream, target=target) - except KeyboardInterrupt: - sys.exit() - - -def download_by_resolution( - youtube: YouTube, resolution: str, target: Optional[str] = None -) -> None: - """Start downloading a YouTube video. - - :param YouTube youtube: - A valid YouTube object. - :param str resolution: - YouTube video resolution. - :param str target: - Target directory for download - """ - # TODO(nficano): allow dash itags to be selected - stream = youtube.streams.get_by_resolution(resolution) - if stream is None: - print(f"Could not find a stream with resolution: {resolution}") - print("Try one of these:") - display_streams(youtube) - sys.exit() - - youtube.register_on_progress_callback(on_progress) - - try: - _download(stream, target=target) - except KeyboardInterrupt: - sys.exit() - - -def download_highest_resolution_progressive( - youtube: YouTube, resolution: str, target: Optional[str] = None -) -> None: - """Start downloading the highest resolution progressive stream. - - :param YouTube youtube: - A valid YouTube object. - :param str resolution: - YouTube video resolution. - :param str target: - Target directory for download - """ - youtube.register_on_progress_callback(on_progress) - try: - stream = youtube.streams.get_highest_resolution() - except exceptions.VideoUnavailable as err: - print(f"No video streams available: {err}") - else: - try: - _download(stream, target=target) - except KeyboardInterrupt: - sys.exit() - - -def display_streams(youtube: YouTube) -> None: - """Probe YouTube video and lists its available formats. - - :param YouTube youtube: - A valid YouTube watch URL. - - """ - for stream in youtube.streams: - print(stream) - - -def _print_available_captions(captions: CaptionQuery) -> None: - print( - f"Available caption codes are: {', '.join(c.code for c in captions)}" - ) - - -def download_caption( - youtube: YouTube, lang_code: Optional[str], target: Optional[str] = None -) -> None: - """Download a caption for the YouTube video. - - :param YouTube youtube: - A valid YouTube object. - :param str lang_code: - Language code desired for caption file. - Prints available codes if the value is None - or the desired code is not available. - :param str target: - Target directory for download - """ - try: - caption = youtube.captions[lang_code] - downloaded_path = caption.download( - title=youtube.title, output_path=target - ) - print(f"Saved caption file to: {downloaded_path}") - except KeyError: - print(f"Unable to find caption with code: {lang_code}") - _print_available_captions(youtube.captions) - - -def download_audio( - youtube: YouTube, filetype: str, target: Optional[str] = None -) -> None: - """ - Given a filetype, downloads the highest quality available audio stream for a - YouTube video. - - :param YouTube youtube: - A valid YouTube object. - :param str filetype: - Desired file format to download. - :param str target: - Target directory for download - """ - audio = ( - youtube.streams.filter(only_audio=True, subtype=filetype) - .order_by("abr") - .last() - ) - - if audio is None: - print("No audio only stream found. Try one of these:") - display_streams(youtube) - sys.exit() - - youtube.register_on_progress_callback(on_progress) - - try: - _download(audio, target=target) - except KeyboardInterrupt: - sys.exit() - - -if __name__ == "__main__": - main() diff --git a/lib/pytube/pytube/contrib/__init__.py b/lib/pytube/pytube/contrib/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/lib/pytube/pytube/contrib/channel.py b/lib/pytube/pytube/contrib/channel.py deleted file mode 100644 index 147ff7e..0000000 --- a/lib/pytube/pytube/contrib/channel.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding: utf-8 -*- -"""Module for interacting with a user's youtube channel.""" -import json -import logging -from typing import Dict, List, Optional, Tuple - -from pytube import extract, Playlist, request -from pytube.helpers import uniqueify - -logger = logging.getLogger(__name__) - - -class Channel(Playlist): - def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): - """Construct a :class:`Channel `. - - :param str url: - A valid YouTube channel URL. - :param proxies: - (Optional) A dictionary of proxies to use for web requests. - """ - super().__init__(url, proxies) - - self.channel_uri = extract.channel_name(url) - - self.channel_url = ( - f"https://www.youtube.com{self.channel_uri}" - ) - - self.videos_url = self.channel_url + '/videos' - self.playlists_url = self.channel_url + '/playlists' - self.community_url = self.channel_url + '/community' - self.featured_channels_url = self.channel_url + '/channels' - self.about_url = self.channel_url + '/about' - - # Possible future additions - self._playlists_html = None - self._community_html = None - self._featured_channels_html = None - self._about_html = None - - @property - def channel_name(self): - """Get the name of the YouTube channel. - - :rtype: str - """ - return self.initial_data['metadata']['channelMetadataRenderer']['title'] - - @property - def channel_id(self): - """Get the ID of the YouTube channel. - - This will return the underlying ID, not the vanity URL. - - :rtype: str - """ - return self.initial_data['metadata']['channelMetadataRenderer']['externalId'] - - @property - def vanity_url(self): - """Get the vanity URL of the YouTube channel. - - Returns None if it doesn't exist. - - :rtype: str - """ - return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None) # noqa:E501 - - @property - def html(self): - """Get the html for the /videos page. - - :rtype: str - """ - if self._html: - return self._html - self._html = request.get(self.videos_url) - return self._html - - @property - def playlists_html(self): - """Get the html for the /playlists page. - - Currently unused for any functionality. - - :rtype: str - """ - if self._playlists_html: - return self._playlists_html - else: - self._playlists_html = request.get(self.playlists_url) - return self._playlists_html - - @property - def community_html(self): - """Get the html for the /community page. - - Currently unused for any functionality. - - :rtype: str - """ - if self._community_html: - return self._community_html - else: - self._community_html = request.get(self.community_url) - return self._community_html - - @property - def featured_channels_html(self): - """Get the html for the /channels page. - - Currently unused for any functionality. - - :rtype: str - """ - if self._featured_channels_html: - return self._featured_channels_html - else: - self._featured_channels_html = request.get(self.featured_channels_url) - return self._featured_channels_html - - @property - def about_html(self): - """Get the html for the /about page. - - Currently unused for any functionality. - - :rtype: str - """ - if self._about_html: - return self._about_html - else: - self._about_html = request.get(self.about_url) - return self._about_html - - @staticmethod - def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]: - """Extracts videos from a raw json page - - :param str raw_json: Input json extracted from the page or the last - server response - :rtype: Tuple[List[str], Optional[str]] - :returns: Tuple containing a list of up to 100 video watch ids and - a continuation token, if more videos are available - """ - initial_data = json.loads(raw_json) - # this is the json tree structure, if the json was extracted from - # html - try: - videos = initial_data["contents"][ - "twoColumnBrowseResultsRenderer"][ - "tabs"][1]["tabRenderer"]["content"][ - "sectionListRenderer"]["contents"][0][ - "itemSectionRenderer"]["contents"][0][ - "gridRenderer"]["items"] - except (KeyError, IndexError, TypeError): - try: - # this is the json tree structure, if the json was directly sent - # by the server in a continuation response - important_content = initial_data[1]['response']['onResponseReceivedActions'][ - 0 - ]['appendContinuationItemsAction']['continuationItems'] - videos = important_content - except (KeyError, IndexError, TypeError): - try: - # this is the json tree structure, if the json was directly sent - # by the server in a continuation response - # no longer a list and no longer has the "response" key - important_content = initial_data['onResponseReceivedActions'][0][ - 'appendContinuationItemsAction']['continuationItems'] - videos = important_content - except (KeyError, IndexError, TypeError) as p: - logger.info(p) - return [], None - - try: - continuation = videos[-1]['continuationItemRenderer'][ - 'continuationEndpoint' - ]['continuationCommand']['token'] - videos = videos[:-1] - except (KeyError, IndexError): - # if there is an error, no continuation is available - continuation = None - - # remove duplicates - return ( - uniqueify( - list( - # only extract the video ids from the video data - map( - lambda x: ( - f"/watch?v=" - f"{x['gridVideoRenderer']['videoId']}" - ), - videos - ) - ), - ), - continuation, - ) diff --git a/lib/pytube/pytube/contrib/playlist.py b/lib/pytube/pytube/contrib/playlist.py deleted file mode 100644 index c55f5e9..0000000 --- a/lib/pytube/pytube/contrib/playlist.py +++ /dev/null @@ -1,419 +0,0 @@ -"""Module to download a complete playlist from a youtube channel.""" -import json -import logging -from collections.abc import Sequence -from datetime import date, datetime -from typing import Dict, Iterable, List, Optional, Tuple, Union - -from pytube import extract, request, YouTube -from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify - -logger = logging.getLogger(__name__) - - -class Playlist(Sequence): - """Load a YouTube playlist with URL""" - - def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): - if proxies: - install_proxy(proxies) - - self._input_url = url - - # These need to be initialized as None for the properties. - self._html = None - self._ytcfg = None - self._initial_data = None - self._sidebar_info = None - - self._playlist_id = None - - @property - def playlist_id(self): - """Get the playlist id. - - :rtype: str - """ - if self._playlist_id: - return self._playlist_id - self._playlist_id = extract.playlist_id(self._input_url) - return self._playlist_id - - @property - def playlist_url(self): - """Get the base playlist url. - - :rtype: str - """ - return f"https://www.youtube.com/playlist?list={self.playlist_id}" - - @property - def html(self): - """Get the playlist page html. - - :rtype: str - """ - if self._html: - return self._html - self._html = request.get(self.playlist_url) - return self._html - - @property - def ytcfg(self): - """Extract the ytcfg from the playlist page html. - - :rtype: dict - """ - if self._ytcfg: - return self._ytcfg - self._ytcfg = extract.get_ytcfg(self.html) - return self._ytcfg - - @property - def initial_data(self): - """Extract the initial data from the playlist page html. - - :rtype: dict - """ - if self._initial_data: - return self._initial_data - else: - self._initial_data = extract.initial_data(self.html) - return self._initial_data - - @property - def sidebar_info(self): - """Extract the sidebar info from the playlist page html. - - :rtype: dict - """ - if self._sidebar_info: - return self._sidebar_info - else: - self._sidebar_info = self.initial_data['sidebar'][ - 'playlistSidebarRenderer']['items'] - return self._sidebar_info - - @property - def yt_api_key(self): - """Extract the INNERTUBE_API_KEY from the playlist ytcfg. - - :rtype: str - """ - return self.ytcfg['INNERTUBE_API_KEY'] - - def _paginate( - self, until_watch_id: Optional[str] = None - ) -> Iterable[List[str]]: - """Parse the video links from the page source, yields the /watch?v= - part from video link - - :param until_watch_id Optional[str]: YouTube Video watch id until - which the playlist should be read. - - :rtype: Iterable[List[str]] - :returns: Iterable of lists of YouTube watch ids - """ - videos_urls, continuation = self._extract_videos( - json.dumps(extract.initial_data(self.html)) - ) - if until_watch_id: - try: - trim_index = videos_urls.index(f"/watch?v={until_watch_id}") - yield videos_urls[:trim_index] - return - except ValueError: - pass - yield videos_urls - - # Extraction from a playlist only returns 100 videos at a time - # if self._extract_videos returns a continuation there are more - # than 100 songs inside a playlist, so we need to add further requests - # to gather all of them - if continuation: - load_more_url, headers, data = self._build_continuation_url(continuation) - else: - load_more_url, headers, data = None, None, None - - while load_more_url and headers and data: # there is an url found - logger.debug("load more url: %s", load_more_url) - # requesting the next page of videos with the url generated from the - # previous page, needs to be a post - req = request.post(load_more_url, extra_headers=headers, data=data) - # extract up to 100 songs from the page loaded - # returns another continuation if more videos are available - videos_urls, continuation = self._extract_videos(req) - if until_watch_id: - try: - trim_index = videos_urls.index(f"/watch?v={until_watch_id}") - yield videos_urls[:trim_index] - return - except ValueError: - pass - yield videos_urls - - if continuation: - load_more_url, headers, data = self._build_continuation_url( - continuation - ) - else: - load_more_url, headers, data = None, None, None - - def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]: - """Helper method to build the url and headers required to request - the next page of videos - - :param str continuation: Continuation extracted from the json response - of the last page - :rtype: Tuple[str, dict, dict] - :returns: Tuple of an url and required headers for the next http - request - """ - return ( - ( - # was changed to this format (and post requests) - # between 2021.03.02 and 2021.03.03 - "https://www.youtube.com/youtubei/v1/browse?key=" - f"{self.yt_api_key}" - ), - { - "X-YouTube-Client-Name": "1", - "X-YouTube-Client-Version": "2.20200720.00.02", - }, - # extra data required for post request - { - "continuation": continuation, - "context": { - "client": { - "clientName": "WEB", - "clientVersion": "2.20200720.00.02" - } - } - } - ) - - @staticmethod - def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]: - """Extracts videos from a raw json page - - :param str raw_json: Input json extracted from the page or the last - server response - :rtype: Tuple[List[str], Optional[str]] - :returns: Tuple containing a list of up to 100 video watch ids and - a continuation token, if more videos are available - """ - initial_data = json.loads(raw_json) - try: - # this is the json tree structure, if the json was extracted from - # html - section_contents = initial_data["contents"][ - "twoColumnBrowseResultsRenderer"][ - "tabs"][0]["tabRenderer"]["content"][ - "sectionListRenderer"]["contents"] - try: - # Playlist without submenus - important_content = section_contents[ - 0]["itemSectionRenderer"][ - "contents"][0]["playlistVideoListRenderer"] - except (KeyError, IndexError, TypeError): - # Playlist with submenus - important_content = section_contents[ - 1]["itemSectionRenderer"][ - "contents"][0]["playlistVideoListRenderer"] - videos = important_content["contents"] - except (KeyError, IndexError, TypeError): - try: - # this is the json tree structure, if the json was directly sent - # by the server in a continuation response - # no longer a list and no longer has the "response" key - important_content = initial_data['onResponseReceivedActions'][0][ - 'appendContinuationItemsAction']['continuationItems'] - videos = important_content - except (KeyError, IndexError, TypeError) as p: - logger.info(p) - return [], None - - try: - continuation = videos[-1]['continuationItemRenderer'][ - 'continuationEndpoint' - ]['continuationCommand']['token'] - videos = videos[:-1] - except (KeyError, IndexError): - # if there is an error, no continuation is available - continuation = None - - # remove duplicates - return ( - uniqueify( - list( - # only extract the video ids from the video data - map( - lambda x: ( - f"/watch?v=" - f"{x['playlistVideoRenderer']['videoId']}" - ), - videos - ) - ), - ), - continuation, - ) - - def trimmed(self, video_id: str) -> Iterable[str]: - """Retrieve a list of YouTube video URLs trimmed at the given video ID - - i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns - [1,2] - :type video_id: str - video ID to trim the returned list of playlist URLs at - :rtype: List[str] - :returns: - List of video URLs from the playlist trimmed at the given ID - """ - for page in self._paginate(until_watch_id=video_id): - yield from (self._video_url(watch_path) for watch_path in page) - - def url_generator(self): - """Generator that yields video URLs. - - :Yields: Video URLs - """ - for page in self._paginate(): - for video in page: - yield self._video_url(video) - - @property # type: ignore - @cache - def video_urls(self) -> DeferredGeneratorList: - """Complete links of all the videos in playlist - - :rtype: List[str] - :returns: List of video URLs - """ - return DeferredGeneratorList(self.url_generator()) - - def videos_generator(self): - for url in self.video_urls: - yield YouTube(url) - - @property - def videos(self) -> Iterable[YouTube]: - """Yields YouTube objects of videos in this playlist - - :rtype: List[YouTube] - :returns: List of YouTube - """ - return DeferredGeneratorList(self.videos_generator()) - - def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]: - return self.video_urls[i] - - def __len__(self) -> int: - return len(self.video_urls) - - def __repr__(self) -> str: - return f"{repr(self.video_urls)}" - - @property - @cache - def last_updated(self) -> Optional[date]: - """Extract the date that the playlist was last updated. - - For some playlists, this will be a specific date, which is returned as a datetime - object. For other playlists, this is an estimate such as "1 week ago". Due to the - fact that this value is returned as a string, pytube does a best-effort parsing - where possible, and returns the raw string where it is not possible. - - :return: Date of last playlist update where possible, else the string provided - :rtype: datetime.date - """ - last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][ - 'stats'][2]['runs'][1]['text'] - try: - date_components = last_updated_text.split() - month = date_components[0] - day = date_components[1].strip(',') - year = date_components[2] - return datetime.strptime( - f"{month} {day:0>2} {year}", "%b %d %Y" - ).date() - except (IndexError, KeyError): - return last_updated_text - - @property - @cache - def title(self) -> Optional[str]: - """Extract playlist title - - :return: playlist title (name) - :rtype: Optional[str] - """ - return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][ - 'title']['runs'][0]['text'] - - @property - def description(self) -> str: - return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][ - 'description']['simpleText'] - - @property - def length(self): - """Extract the number of videos in the playlist. - - :return: Playlist video count - :rtype: int - """ - count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][ - 'stats'][0]['runs'][0]['text'] - count_text = count_text.replace(',','') - return int(count_text) - - @property - def views(self): - """Extract view count for playlist. - - :return: Playlist view count - :rtype: int - """ - # "1,234,567 views" - views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][ - 'stats'][1]['simpleText'] - # "1,234,567" - count_text = views_text.split()[0] - # "1234567" - count_text = count_text.replace(',', '') - return int(count_text) - - @property - def owner(self): - """Extract the owner of the playlist. - - :return: Playlist owner name. - :rtype: str - """ - return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][ - 'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text'] - - @property - def owner_id(self): - """Extract the channel_id of the owner of the playlist. - - :return: Playlist owner's channel ID. - :rtype: str - """ - return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][ - 'videoOwner']['videoOwnerRenderer']['title']['runs'][0][ - 'navigationEndpoint']['browseEndpoint']['browseId'] - - @property - def owner_url(self): - """Create the channel url of the owner of the playlist. - - :return: Playlist owner's channel url. - :rtype: str - """ - return f'https://www.youtube.com/channel/{self.owner_id}' - - @staticmethod - def _video_url(watch_path: str): - return f"https://www.youtube.com{watch_path}" diff --git a/lib/pytube/pytube/contrib/search.py b/lib/pytube/pytube/contrib/search.py deleted file mode 100644 index 87d60c5..0000000 --- a/lib/pytube/pytube/contrib/search.py +++ /dev/null @@ -1,225 +0,0 @@ -"""Module for interacting with YouTube search.""" -# Native python imports -import logging - -# Local imports -from pytube import YouTube -from pytube.innertube import InnerTube - - -logger = logging.getLogger(__name__) - - -class Search: - def __init__(self, query): - """Initialize Search object. - - :param str query: - Search query provided by the user. - """ - self.query = query - self._innertube_client = InnerTube(client='WEB') - - # The first search, without a continuation, is structured differently - # and contains completion suggestions, so we must store this separately - self._initial_results = None - - self._results = None - self._completion_suggestions = None - - # Used for keeping track of query continuations so that new results - # are always returned when get_next_results() is called - self._current_continuation = None - - @property - def completion_suggestions(self): - """Return query autocompletion suggestions for the query. - - :rtype: list - :returns: - A list of autocomplete suggestions provided by YouTube for the query. - """ - if self._completion_suggestions: - return self._completion_suggestions - if self.results: - self._completion_suggestions = self._initial_results['refinements'] - return self._completion_suggestions - - @property - def results(self): - """Return search results. - - On first call, will generate and return the first set of results. - Additional results can be generated using ``.get_next_results()``. - - :rtype: list - :returns: - A list of YouTube objects. - """ - if self._results: - return self._results - - videos, continuation = self.fetch_and_parse() - self._results = videos - self._current_continuation = continuation - return self._results - - def get_next_results(self): - """Use the stored continuation string to fetch the next set of results. - - This method does not return the results, but instead updates the results property. - """ - if self._current_continuation: - videos, continuation = self.fetch_and_parse(self._current_continuation) - self._results.extend(videos) - self._current_continuation = continuation - else: - raise IndexError - - def fetch_and_parse(self, continuation=None): - """Fetch from the innertube API and parse the results. - - :param str continuation: - Continuation string for fetching results. - :rtype: tuple - :returns: - A tuple of a list of YouTube objects and a continuation string. - """ - # Begin by executing the query and identifying the relevant sections - # of the results - raw_results = self.fetch_query(continuation) - - # Initial result is handled by try block, continuations by except block - try: - sections = raw_results['contents']['twoColumnSearchResultsRenderer'][ - 'primaryContents']['sectionListRenderer']['contents'] - except KeyError: - sections = raw_results['onResponseReceivedCommands'][0][ - 'appendContinuationItemsAction']['continuationItems'] - item_renderer = None - continuation_renderer = None - for s in sections: - if 'itemSectionRenderer' in s: - item_renderer = s['itemSectionRenderer'] - if 'continuationItemRenderer' in s: - continuation_renderer = s['continuationItemRenderer'] - - # If the continuationItemRenderer doesn't exist, assume no further results - if continuation_renderer: - next_continuation = continuation_renderer['continuationEndpoint'][ - 'continuationCommand']['token'] - else: - next_continuation = None - - # If the itemSectionRenderer doesn't exist, assume no results. - if item_renderer: - videos = [] - raw_video_list = item_renderer['contents'] - for video_details in raw_video_list: - # Skip over ads - if video_details.get('searchPyvRenderer', {}).get('ads', None): - continue - - # Skip "recommended" type videos e.g. "people also watched" and "popular X" - # that break up the search results - if 'shelfRenderer' in video_details: - continue - - # Skip auto-generated "mix" playlist results - if 'radioRenderer' in video_details: - continue - - # Skip playlist results - if 'playlistRenderer' in video_details: - continue - - # Skip channel results - if 'channelRenderer' in video_details: - continue - - # Skip 'people also searched for' results - if 'horizontalCardListRenderer' in video_details: - continue - - # Can't seem to reproduce, probably related to typo fix suggestions - if 'didYouMeanRenderer' in video_details: - continue - - # Seems to be the renderer used for the image shown on a no results page - if 'backgroundPromoRenderer' in video_details: - continue - - if 'videoRenderer' not in video_details: - logger.warning('Unexpected renderer encountered.') - logger.warning(f'Renderer name: {video_details.keys()}') - logger.warning(f'Search term: {self.query}') - logger.warning( - 'Please open an issue at ' - 'https://github.com/pytube/pytube/issues ' - 'and provide this log output.' - ) - continue - - # Extract relevant video information from the details. - # Some of this can be used to pre-populate attributes of the - # YouTube object. - vid_renderer = video_details['videoRenderer'] - vid_id = vid_renderer['videoId'] - vid_url = f'https://www.youtube.com/watch?v={vid_id}' - vid_title = vid_renderer['title']['runs'][0]['text'] - vid_channel_name = vid_renderer['ownerText']['runs'][0]['text'] - vid_channel_uri = vid_renderer['ownerText']['runs'][0][ - 'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] - # Livestreams have "runs", non-livestreams have "simpleText", - # and scheduled releases do not have 'viewCountText' - if 'viewCountText' in vid_renderer: - if 'runs' in vid_renderer['viewCountText']: - vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text'] - else: - vid_view_count_text = vid_renderer['viewCountText']['simpleText'] - # Strip ' views' text, then remove commas - stripped_text = vid_view_count_text.split()[0].replace(',','') - if stripped_text == 'No': - vid_view_count = 0 - else: - vid_view_count = int(stripped_text) - else: - vid_view_count = 0 - if 'lengthText' in vid_renderer: - vid_length = vid_renderer['lengthText']['simpleText'] - else: - vid_length = None - - vid_metadata = { - 'id': vid_id, - 'url': vid_url, - 'title': vid_title, - 'channel_name': vid_channel_name, - 'channel_url': vid_channel_uri, - 'view_count': vid_view_count, - 'length': vid_length - } - - # Construct YouTube object from metadata and append to results - vid = YouTube(vid_metadata['url']) - vid.author = vid_metadata['channel_name'] - vid.title = vid_metadata['title'] - videos.append(vid) - else: - videos = None - - return videos, next_continuation - - def fetch_query(self, continuation=None): - """Fetch raw results from the innertube API. - - :param str continuation: - Continuation string for fetching results. - :rtype: dict - :returns: - The raw json object returned by the innertube API. - """ - query_results = self._innertube_client.search(self.query, continuation) - if not self._initial_results: - self._initial_results = query_results - return query_results # noqa:R504 diff --git a/lib/pytube/pytube/exceptions.py b/lib/pytube/pytube/exceptions.py deleted file mode 100644 index ec44d2a..0000000 --- a/lib/pytube/pytube/exceptions.py +++ /dev/null @@ -1,145 +0,0 @@ -"""Library specific exception definitions.""" -from typing import Pattern, Union - - -class PytubeError(Exception): - """Base pytube exception that all others inherit. - - This is done to not pollute the built-in exceptions, which *could* result - in unintended errors being unexpectedly and incorrectly handled within - implementers code. - """ - - -class MaxRetriesExceeded(PytubeError): - """Maximum number of retries exceeded.""" - - -class HTMLParseError(PytubeError): - """HTML could not be parsed""" - - -class ExtractError(PytubeError): - """Data extraction based exception.""" - - -class RegexMatchError(ExtractError): - """Regex pattern did not return any matches.""" - - def __init__(self, caller: str, pattern: Union[str, Pattern]): - """ - :param str caller: - Calling function - :param str pattern: - Pattern that failed to match - """ - super().__init__(f"{caller}: could not find match for {pattern}") - self.caller = caller - self.pattern = pattern - - -class VideoUnavailable(PytubeError): - """Base video unavailable error.""" - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.error_string) - - @property - def error_string(self): - return f'{self.video_id} is unavailable' - - -class AgeRestrictedError(VideoUnavailable): - """Video is age restricted, and cannot be accessed without OAuth.""" - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f"{self.video_id} is age restricted, and can't be accessed without logging in." - - -class LiveStreamError(VideoUnavailable): - """Video is a live stream.""" - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f'{self.video_id} is streaming live and cannot be loaded' - - -class VideoPrivate(VideoUnavailable): - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f'{self.video_id} is a private video' - - -class RecordingUnavailable(VideoUnavailable): - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f'{self.video_id} does not have a live stream recording available' - - -class MembersOnly(VideoUnavailable): - """Video is members-only. - - YouTube has special videos that are only viewable to users who have - subscribed to a content creator. - ref: https://support.google.com/youtube/answer/7544492?hl=en - """ - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f'{self.video_id} is a members-only video' - - -class VideoRegionBlocked(VideoUnavailable): - def __init__(self, video_id: str): - """ - :param str video_id: - A YouTube video identifier. - """ - self.video_id = video_id - super().__init__(self.video_id) - - @property - def error_string(self): - return f'{self.video_id} is not available in your region' diff --git a/lib/pytube/pytube/extract.py b/lib/pytube/pytube/extract.py deleted file mode 100644 index d083214..0000000 --- a/lib/pytube/pytube/extract.py +++ /dev/null @@ -1,579 +0,0 @@ -"""This module contains all non-cipher related data extraction logic.""" -import logging -import urllib.parse -import re -from collections import OrderedDict -from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import parse_qs, quote, urlencode, urlparse - -from pytube.cipher import Cipher -from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError -from pytube.helpers import regex_search -from pytube.metadata import YouTubeMetadata -from pytube.parser import parse_for_object, parse_for_all_objects - - -logger = logging.getLogger(__name__) - - -def publish_date(watch_html: str): - """Extract publish date - :param str watch_html: - The html contents of the watch page. - :rtype: str - :returns: - Publish date of the video. - """ - try: - result = regex_search( - r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}", - watch_html, group=0 - ) - except RegexMatchError: - return None - return datetime.strptime(result, '%Y-%m-%d') - - -def recording_available(watch_html): - """Check if live stream recording is available. - - :param str watch_html: - The html contents of the watch page. - :rtype: bool - :returns: - Whether or not the content is private. - """ - unavailable_strings = [ - 'This live stream recording is not available.' - ] - for string in unavailable_strings: - if string in watch_html: - return False - return True - - -def is_private(watch_html): - """Check if content is private. - - :param str watch_html: - The html contents of the watch page. - :rtype: bool - :returns: - Whether or not the content is private. - """ - private_strings = [ - "This is a private video. Please sign in to verify that you may see it.", - "\"simpleText\":\"Private video\"", - "This video is private." - ] - for string in private_strings: - if string in watch_html: - return True - return False - - -def is_age_restricted(watch_html: str) -> bool: - """Check if content is age restricted. - - :param str watch_html: - The html contents of the watch page. - :rtype: bool - :returns: - Whether or not the content is age restricted. - """ - try: - regex_search(r"og:restrictions:age", watch_html, group=0) - except RegexMatchError: - return False - return True - - -def playability_status(watch_html: str) -> (str, str): - """Return the playability status and status explanation of a video. - - For example, a video may have a status of LOGIN_REQUIRED, and an explanation - of "This is a private video. Please sign in to verify that you may see it." - - This explanation is what gets incorporated into the media player overlay. - - :param str watch_html: - The html contents of the watch page. - :rtype: bool - :returns: - Playability status and reason of the video. - """ - player_response = initial_player_response(watch_html) - status_dict = player_response.get('playabilityStatus', {}) - if 'liveStreamability' in status_dict: - return 'LIVE_STREAM', 'Video is a live stream.' - if 'status' in status_dict: - if 'reason' in status_dict: - return status_dict['status'], [status_dict['reason']] - if 'messages' in status_dict: - return status_dict['status'], status_dict['messages'] - return None, [None] - - -def video_id(url: str) -> str: - """Extract the ``video_id`` from a YouTube url. - - This function supports the following patterns: - - - :samp:`https://youtube.com/watch?v={video_id}` - - :samp:`https://youtube.com/embed/{video_id}` - - :samp:`https://youtu.be/{video_id}` - - :param str url: - A YouTube url containing a video id. - :rtype: str - :returns: - YouTube video id. - """ - return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1) - - -def playlist_id(url: str) -> str: - """Extract the ``playlist_id`` from a YouTube url. - - This function supports the following patterns: - - - :samp:`https://youtube.com/playlist?list={playlist_id}` - - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}` - - :param str url: - A YouTube url containing a playlist id. - :rtype: str - :returns: - YouTube playlist id. - """ - parsed = urllib.parse.urlparse(url) - return parse_qs(parsed.query)['list'][0] - - -def channel_name(url: str) -> str: - """Extract the ``channel_name`` or ``channel_id`` from a YouTube url. - - This function supports the following patterns: - - - :samp:`https://youtube.com/c/{channel_name}/*` - - :samp:`https://youtube.com/channel/{channel_id}/* - - :samp:`https://youtube.com/u/{channel_name}/*` - - :samp:`https://youtube.com/user/{channel_id}/* - - :param str url: - A YouTube url containing a channel name. - :rtype: str - :returns: - YouTube channel name. - """ - patterns = [ - r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)", - r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)", - r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)", - r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)" - ] - for pattern in patterns: - regex = re.compile(pattern) - function_match = regex.search(url) - if function_match: - logger.debug("finished regex search, matched: %s", pattern) - uri_style = function_match.group(1) - uri_identifier = function_match.group(2) - return f'/{uri_style}/{uri_identifier}' - - raise RegexMatchError( - caller="channel_name", pattern="patterns" - ) - - -def video_info_url(video_id: str, watch_url: str) -> str: - """Construct the video_info url. - - :param str video_id: - A YouTube video identifier. - :param str watch_url: - A YouTube watch url. - :rtype: str - :returns: - :samp:`https://youtube.com/get_video_info` with necessary GET - parameters. - """ - params = OrderedDict( - [ - ("video_id", video_id), - ("ps", "default"), - ("eurl", quote(watch_url)), - ("hl", "en_US"), - ("html5", "1"), - ("c", "TVHTML5"), - ("cver", "7.20201028"), - ] - ) - return _video_info_url(params) - - -def video_info_url_age_restricted(video_id: str, embed_html: str) -> str: - """Construct the video_info url. - - :param str video_id: - A YouTube video identifier. - :param str embed_html: - The html contents of the embed page (for age restricted videos). - :rtype: str - :returns: - :samp:`https://youtube.com/get_video_info` with necessary GET - parameters. - """ - try: - sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1) - except RegexMatchError: - sts = "" - # Here we use ``OrderedDict`` so that the output is consistent between - # Python 2.7+. - eurl = f"https://youtube.googleapis.com/v/{video_id}" - params = OrderedDict( - [ - ("video_id", video_id), - ("eurl", eurl), - ("sts", sts), - ("html5", "1"), - ("c", "TVHTML5"), - ("cver", "7.20201028"), - ] - ) - return _video_info_url(params) - - -def _video_info_url(params: OrderedDict) -> str: - return "https://www.youtube.com/get_video_info?" + urlencode(params) - - -def js_url(html: str) -> str: - """Get the base JavaScript url. - - Construct the base JavaScript url, which contains the decipher - "transforms". - - :param str html: - The html contents of the watch page. - """ - try: - base_js = get_ytplayer_config(html)['assets']['js'] - except (KeyError, RegexMatchError): - base_js = get_ytplayer_js(html) - return "https://youtube.com" + base_js - - -def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]: - """Parse the type data. - - Breaks up the data in the ``type`` key of the manifest, which contains the - mime type and codecs serialized together, and splits them into separate - elements. - - **Example**: - - mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus']) - - :param str mime_type_codec: - String containing mime type and codecs. - :rtype: tuple - :returns: - The mime type and a list of codecs. - - """ - pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"" - regex = re.compile(pattern) - results = regex.search(mime_type_codec) - if not results: - raise RegexMatchError(caller="mime_type_codec", pattern=pattern) - mime_type, codecs = results.groups() - return mime_type, [c.strip() for c in codecs.split(",")] - - -def get_ytplayer_js(html: str) -> Any: - """Get the YouTube player base JavaScript path. - - :param str html - The html contents of the watch page. - :rtype: str - :returns: - Path to YouTube's base.js file. - """ - js_url_patterns = [ - r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)" - ] - for pattern in js_url_patterns: - regex = re.compile(pattern) - function_match = regex.search(html) - if function_match: - logger.debug("finished regex search, matched: %s", pattern) - yt_player_js = function_match.group(1) - return yt_player_js - - raise RegexMatchError( - caller="get_ytplayer_js", pattern="js_url_patterns" - ) - - -def get_ytplayer_config(html: str) -> Any: - """Get the YouTube player configuration data from the watch html. - - Extract the ``ytplayer_config``, which is json data embedded within the - watch html and serves as the primary source of obtaining the stream - manifest data. - - :param str html: - The html contents of the watch page. - :rtype: str - :returns: - Substring of the html containing the encoded manifest data. - """ - logger.debug("finding initial function name") - config_patterns = [ - r"ytplayer\.config\s*=\s*", - r"ytInitialPlayerResponse\s*=\s*" - ] - for pattern in config_patterns: - # Try each pattern consecutively if they don't find a match - try: - return parse_for_object(html, pattern) - except HTMLParseError as e: - logger.debug(f'Pattern failed: {pattern}') - logger.debug(e) - continue - - # setConfig() needs to be handled a little differently. - # We want to parse the entire argument to setConfig() - # and use then load that as json to find PLAYER_CONFIG - # inside of it. - setconfig_patterns = [ - r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*" - ] - for pattern in setconfig_patterns: - # Try each pattern consecutively if they don't find a match - try: - return parse_for_object(html, pattern) - except HTMLParseError: - continue - - raise RegexMatchError( - caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns" - ) - - -def get_ytcfg(html: str) -> str: - """Get the entirety of the ytcfg object. - - This is built over multiple pieces, so we have to find all matches and - combine the dicts together. - - :param str html: - The html contents of the watch page. - :rtype: str - :returns: - Substring of the html containing the encoded manifest data. - """ - ytcfg = {} - ytcfg_patterns = [ - r"ytcfg\s=\s", - r"ytcfg\.set\(" - ] - for pattern in ytcfg_patterns: - # Try each pattern consecutively and try to build a cohesive object - try: - found_objects = parse_for_all_objects(html, pattern) - for obj in found_objects: - ytcfg.update(obj) - except HTMLParseError: - continue - - if len(ytcfg) > 0: - return ytcfg - - raise RegexMatchError( - caller="get_ytcfg", pattern="ytcfg_pattenrs" - ) - - -def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str) -> None: - """Apply the decrypted signature to the stream manifest. - - :param dict stream_manifest: - Details of the media streams available. - :param str js: - The contents of the base.js asset file. - - """ - cipher = Cipher(js=js) - - for i, stream in enumerate(stream_manifest): - try: - url: str = stream["url"] - except KeyError: - live_stream = ( - vid_info.get("playabilityStatus", {},) - .get("liveStreamability") - ) - if live_stream: - raise LiveStreamError("UNKNOWN") - # 403 Forbidden fix. - if "signature" in url or ( - "s" not in stream and ("&sig=" in url or "&lsig=" in url) - ): - # For certain videos, YouTube will just provide them pre-signed, in - # which case there's no real magic to download them and we can skip - # the whole signature descrambling entirely. - logger.debug("signature found, skip decipher") - continue - - signature = cipher.get_signature(ciphered_signature=stream["s"]) - - logger.debug( - "finished descrambling signature for itag=%s", stream["itag"] - ) - parsed_url = urlparse(url) - - # Convert query params off url to dict - query_params = parse_qs(urlparse(url).query) - query_params = { - k: v[0] for k,v in query_params.items() - } - query_params['sig'] = signature - if 'ratebypass' not in query_params.keys(): - # Cipher n to get the updated value - - initial_n = list(query_params['n']) - new_n = cipher.calculate_n(initial_n) - query_params['n'] = new_n - - url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}' # noqa:E501 - - # 403 forbidden fix - stream_manifest[i]["url"] = url - - -def apply_descrambler(stream_data: Dict) -> None: - """Apply various in-place transforms to YouTube's media stream data. - - Creates a ``list`` of dictionaries by string splitting on commas, then - taking each list item, parsing it as a query string, converting it to a - ``dict`` and unquoting the value. - - :param dict stream_data: - Dictionary containing query string encoded values. - - **Example**: - - >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} - >>> apply_descrambler(d, 'foo') - >>> print(d) - {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} - - """ - if 'url' in stream_data: - return None - - # Merge formats and adaptiveFormats into a single list - formats = [] - if 'formats' in stream_data.keys(): - formats.extend(stream_data['formats']) - if 'adaptiveFormats' in stream_data.keys(): - formats.extend(stream_data['adaptiveFormats']) - - # Extract url and s from signatureCiphers as necessary - for data in formats: - if 'url' not in data: - if 'signatureCipher' in data: - cipher_url = parse_qs(data['signatureCipher']) - data['url'] = cipher_url['url'][0] - data['s'] = cipher_url['s'][0] - data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF' - - logger.debug("applying descrambler") - return formats - - -def initial_data(watch_html: str) -> str: - """Extract the ytInitialData json from the watch_html page. - - This mostly contains metadata necessary for rendering the page on-load, - such as video information, copyright notices, etc. - - @param watch_html: Html of the watch page - @return: - """ - patterns = [ - r"window\[['\"]ytInitialData['\"]]\s*=\s*", - r"ytInitialData\s*=\s*" - ] - for pattern in patterns: - try: - return parse_for_object(watch_html, pattern) - except HTMLParseError: - pass - - raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern') - - -def initial_player_response(watch_html: str) -> str: - """Extract the ytInitialPlayerResponse json from the watch_html page. - - This mostly contains metadata necessary for rendering the page on-load, - such as video information, copyright notices, etc. - - @param watch_html: Html of the watch page - @return: - """ - patterns = [ - r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*", - r"ytInitialPlayerResponse\s*=\s*" - ] - for pattern in patterns: - try: - return parse_for_object(watch_html, pattern) - except HTMLParseError: - pass - - raise RegexMatchError( - caller='initial_player_response', - pattern='initial_player_response_pattern' - ) - - -def metadata(initial_data) -> Optional[YouTubeMetadata]: - """Get the informational metadata for the video. - - e.g.: - [ - { - 'Song': '강남스타일(Gangnam Style)', - 'Artist': 'PSY', - 'Album': 'PSY SIX RULES Pt.1', - 'Licensed to YouTube by': 'YG Entertainment Inc. [...]' - } - ] - - :rtype: YouTubeMetadata - """ - try: - metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][ - "results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][ - "metadataRowContainer"]["metadataRowContainerRenderer"]["rows"] - except (KeyError, IndexError): - # If there's an exception accessing this data, it probably doesn't exist. - return YouTubeMetadata([]) - - # Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer" - # and we only care about the former, so we filter the others - metadata_rows = filter( - lambda x: "metadataRowRenderer" in x.keys(), - metadata_rows - ) - - # We then access the metadataRowRenderer key in each element - # and build a metadata object from this new list - metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows] - - return YouTubeMetadata(metadata_rows) diff --git a/lib/pytube/pytube/helpers.py b/lib/pytube/pytube/helpers.py deleted file mode 100644 index 4cf02eb..0000000 --- a/lib/pytube/pytube/helpers.py +++ /dev/null @@ -1,335 +0,0 @@ -"""Various helper functions implemented by pytube.""" -import functools -import gzip -import json -import logging -import os -import re -import warnings -from typing import Any, Callable, Dict, List, Optional, TypeVar -from urllib import request - -from pytube.exceptions import RegexMatchError - -logger = logging.getLogger(__name__) - - -class DeferredGeneratorList: - """A wrapper class for deferring list generation. - - Pytube has some continuation generators that create web calls, which means - that any time a full list is requested, all of those web calls must be - made at once, which could lead to slowdowns. This will allow individual - elements to be queried, so that slowdowns only happen as necessary. For - example, you can iterate over elements in the list without accessing them - all simultaneously. This should allow for speed improvements for playlist - and channel interactions. - """ - def __init__(self, generator): - """Construct a :class:`DeferredGeneratorList `. - - :param generator generator: - The deferrable generator to create a wrapper for. - :param func func: - (Optional) A function to call on the generator items to produce the list. - """ - self.gen = generator - self._elements = [] - - def __eq__(self, other): - """We want to mimic list behavior for comparison.""" - return list(self) == other - - def __getitem__(self, key) -> Any: - """Only generate items as they're asked for.""" - # We only allow querying with indexes. - if not isinstance(key, (int, slice)): - raise TypeError('Key must be either a slice or int.') - - # Convert int keys to slice - key_slice = key - if isinstance(key, int): - key_slice = slice(key, key + 1, 1) - - # Generate all elements up to the final item - while len(self._elements) < key_slice.stop: - try: - next_item = next(self.gen) - except StopIteration: - # If we can't find enough elements for the slice, raise an IndexError - raise IndexError - else: - self._elements.append(next_item) - - return self._elements[key] - - def __iter__(self): - """Custom iterator for dynamically generated list.""" - iter_index = 0 - while True: - try: - curr_item = self[iter_index] - except IndexError: - return - else: - yield curr_item - iter_index += 1 - - def __next__(self) -> Any: - """Fetch next element in iterator.""" - try: - curr_element = self[self.iter_index] - except IndexError: - raise StopIteration - self.iter_index += 1 - return curr_element # noqa:R504 - - def __len__(self) -> int: - """Return length of list of all items.""" - self.generate_all() - return len(self._elements) - - def __repr__(self) -> str: - """String representation of all items.""" - self.generate_all() - return str(self._elements) - - def __reversed__(self): - self.generate_all() - return self._elements[::-1] - - def generate_all(self): - """Generate all items.""" - while True: - try: - next_item = next(self.gen) - except StopIteration: - break - else: - self._elements.append(next_item) - - -def regex_search(pattern: str, string: str, group: int) -> str: - """Shortcut method to search a string for a given pattern. - - :param str pattern: - A regular expression pattern. - :param str string: - A target string to search. - :param int group: - Index of group to return. - :rtype: - str or tuple - :returns: - Substring pattern matches. - """ - regex = re.compile(pattern) - results = regex.search(string) - if not results: - raise RegexMatchError(caller="regex_search", pattern=pattern) - - logger.debug("matched regex search: %s", pattern) - - return results.group(group) - - -def safe_filename(s: str, max_length: int = 255) -> str: - """Sanitize a string making it safe to use as a filename. - - This function was based off the limitations outlined here: - https://en.wikipedia.org/wiki/Filename. - - :param str s: - A string to make safe for use as a file name. - :param int max_length: - The maximum filename character length. - :rtype: str - :returns: - A sanitized string. - """ - # Characters in range 0-31 (0x00-0x1F) are not allowed in ntfs filenames. - ntfs_characters = [chr(i) for i in range(0, 31)] - characters = [ - r'"', - r"\#", - r"\$", - r"\%", - r"'", - r"\*", - r"\,", - r"\.", - r"\/", - r"\:", - r'"', - r"\;", - r"\<", - r"\>", - r"\?", - r"\\", - r"\^", - r"\|", - r"\~", - r"\\\\", - ] - pattern = "|".join(ntfs_characters + characters) - regex = re.compile(pattern, re.UNICODE) - filename = regex.sub("", s) - return filename[:max_length].rsplit(" ", 0)[0] - - -def setup_logger(level: int = logging.ERROR, log_filename: Optional[str] = None) -> None: - """Create a configured instance of logger. - - :param int level: - Describe the severity level of the logs to handle. - """ - fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s" - date_fmt = "%H:%M:%S" - formatter = logging.Formatter(fmt, datefmt=date_fmt) - - # https://github.com/pytube/pytube/issues/163 - logger = logging.getLogger("pytube") - logger.setLevel(level) - - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - - if log_filename is not None: - file_handler = logging.FileHandler(log_filename) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - - -GenericType = TypeVar("GenericType") - - -def cache(func: Callable[..., GenericType]) -> GenericType: - """ mypy compatible annotation wrapper for lru_cache""" - return functools.lru_cache()(func) # type: ignore - - -def deprecated(reason: str) -> Callable: - """ - This is a decorator which can be used to mark functions - as deprecated. It will result in a warning being emitted - when the function is used. - """ - - def decorator(func1): - message = "Call to deprecated function {name} ({reason})." - - @functools.wraps(func1) - def new_func1(*args, **kwargs): - warnings.simplefilter("always", DeprecationWarning) - warnings.warn( - message.format(name=func1.__name__, reason=reason), - category=DeprecationWarning, - stacklevel=2, - ) - warnings.simplefilter("default", DeprecationWarning) - return func1(*args, **kwargs) - - return new_func1 - - return decorator - - -def target_directory(output_path: Optional[str] = None) -> str: - """ - Function for determining target directory of a download. - Returns an absolute path (if relative one given) or the current - path (if none given). Makes directory if it does not exist. - - :type output_path: str - :rtype: str - :returns: - An absolute directory path as a string. - """ - if output_path: - if not os.path.isabs(output_path): - output_path = os.path.join(os.getcwd(), output_path) - else: - output_path = os.getcwd() - os.makedirs(output_path, exist_ok=True) - return output_path - - -def install_proxy(proxy_handler: Dict[str, str]) -> None: - proxy_support = request.ProxyHandler(proxy_handler) - opener = request.build_opener(proxy_support) - request.install_opener(opener) - - -def uniqueify(duped_list: List) -> List: - """Remove duplicate items from a list, while maintaining list order. - - :param List duped_list - List to remove duplicates from - - :return List result - De-duplicated list - """ - seen: Dict[Any, bool] = {} - result = [] - for item in duped_list: - if item in seen: - continue - seen[item] = True - result.append(item) - return result - - -def generate_all_html_json_mocks(): - """Regenerate the video mock json files for all current test videos. - - This should automatically output to the test/mocks directory. - """ - test_vid_ids = [ - '2lAe1cqCOXo', - '5YceQ8YqYMc', - 'irauhITDrsE', - 'm8uHb5jIGN8', - 'QRS8MkLhQmM', - 'WXxV9g7lsFE' - ] - for vid_id in test_vid_ids: - create_mock_html_json(vid_id) - - -def create_mock_html_json(vid_id) -> Dict[str, Any]: - """Generate a json.gz file with sample html responses. - - :param str vid_id - YouTube video id - - :return dict data - Dict used to generate the json.gz file - """ - from pytube import YouTube - gzip_filename = 'yt-video-%s-html.json.gz' % vid_id - - # Get the pytube directory in order to navigate to /tests/mocks - pytube_dir_path = os.path.abspath( - os.path.join( - os.path.dirname(__file__), - os.path.pardir - ) - ) - pytube_mocks_path = os.path.join(pytube_dir_path, 'tests', 'mocks') - gzip_filepath = os.path.join(pytube_mocks_path, gzip_filename) - - yt = YouTube(f'https://www.youtube.com/watch?v={vid_id}') - html_data = { - 'url': yt.watch_url, - 'js': yt.js, - 'embed_html': yt.embed_html, - 'watch_html': yt.watch_html, - 'vid_info': yt.vid_info - } - - logger.info(f'Outputing json.gz file to {gzip_filepath}') - with gzip.open(gzip_filepath, 'wb') as f: - f.write(json.dumps(html_data).encode('utf-8')) - - return html_data diff --git a/lib/pytube/pytube/innertube.py b/lib/pytube/pytube/innertube.py deleted file mode 100644 index f1af0f6..0000000 --- a/lib/pytube/pytube/innertube.py +++ /dev/null @@ -1,507 +0,0 @@ -"""This module is designed to interact with the innertube API. - -This module is NOT intended to be used directly by end users, as each of the -interfaces returns raw results. These should instead be parsed to extract -the useful information for the end user. -""" -# Native python imports -import json -import os -import pathlib -import time -from urllib import parse - -# Local imports -from pytube import request - -# YouTube on TV client secrets -_client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com' -_client_secret = 'SboVhoG9s0rNafixCSGGKXAT' - -# Extracted API keys -- unclear what these are linked to. -_api_keys = [ - 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', - 'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30', - 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', - 'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los', - 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', - 'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k' -] - -_default_clients = { - 'WEB': { - 'context': { - 'client': { - 'clientName': 'WEB', - 'clientVersion': '2.20200720.00.02' - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'ANDROID': { - 'context': { - 'client': { - 'clientName': 'ANDROID', - 'clientVersion': '17.31.35', - 'androidSdkVersion': 30 - } - }, - 'header': { - 'User-Agent': 'com.google.android.youtube/', - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'IOS': { - 'context': { - 'client': { - 'clientName': 'IOS', - 'clientVersion': '17.33.2', - 'deviceModel': 'iPhone14,3' - } - }, - 'header': { - 'User-Agent': 'com.google.ios.youtube/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - - 'WEB_EMBED': { - 'context': { - 'client': { - 'clientName': 'WEB_EMBEDDED_PLAYER', - 'clientVersion': '2.20210721.00.00', - 'clientScreen': 'EMBED' - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'ANDROID_EMBED': { - 'context': { - 'client': { - 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '17.31.35', - 'clientScreen': 'EMBED', - 'androidSdkVersion': 30, - } - }, - 'header': { - 'User-Agent': 'com.google.android.youtube/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'IOS_EMBED': { - 'context': { - 'client': { - 'clientName': 'IOS_MESSAGES_EXTENSION', - 'clientVersion': '17.33.2', - 'deviceModel': 'iPhone14,3' - } - }, - 'header': { - 'User-Agent': 'com.google.ios.youtube/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - - 'WEB_MUSIC': { - 'context': { - 'client': { - 'clientName': 'WEB_REMIX', - 'clientVersion': '1.20220727.01.00', - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'ANDROID_MUSIC': { - 'context': { - 'client': { - 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '5.16.51', - 'androidSdkVersion': 30 - } - }, - 'header': { - 'User-Agent': 'com.google.android.apps.youtube.music/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'IOS_MUSIC': { - 'context': { - 'client': { - 'clientName': 'IOS_MUSIC', - 'clientVersion': '5.21', - 'deviceModel': 'iPhone14,3' - } - }, - 'header': { - 'User-Agent': 'com.google.ios.youtubemusic/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - - 'WEB_CREATOR': { - 'context': { - 'client': { - 'clientName': 'WEB_CREATOR', - 'clientVersion': '1.20220726.00.00', - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'ANDROID_CREATOR': { - 'context': { - 'client': { - 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '22.30.100', - 'androidSdkVersion': 30, - } - }, - 'header': { - 'User-Agent': 'com.google.android.apps.youtube.creator/', - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - 'IOS_CREATOR': { - 'context': { - 'client': { - 'clientName': 'IOS_CREATOR', - 'clientVersion': '22.33.101', - 'deviceModel': 'iPhone14,3', - } - }, - 'header': { - 'User-Agent': 'com.google.ios.ytcreator/' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - - 'MWEB': { - 'context': { - 'client': { - 'clientName': 'MWEB', - 'clientVersion': '2.20220801.00.00', - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, - - 'TV_EMBED': { - 'context': { - 'client': { - 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', - 'clientVersion': '2.0', - } - }, - 'header': { - 'User-Agent': 'Mozilla/5.0' - }, - 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - }, -} -_token_timeout = 1800 -_cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__' -_token_file = os.path.join(_cache_dir, 'tokens.json') - - -class InnerTube: - """Object for interacting with the innertube API.""" - def __init__(self, client='ANDROID_MUSIC', use_oauth=False, allow_cache=True): - """Initialize an InnerTube object. - - :param str client: - Client to use for the object. - Default to web because it returns the most playback types. - :param bool use_oauth: - Whether or not to authenticate to YouTube. - :param bool allow_cache: - Allows caching of oauth tokens on the machine. - """ - self.context = _default_clients[client]['context'] - self.header = _default_clients[client]['header'] - self.api_key = _default_clients[client]['api_key'] - self.access_token = None - self.refresh_token = None - self.use_oauth = use_oauth - self.allow_cache = allow_cache - - # Stored as epoch time - self.expires = None - - # Try to load from file if specified - if self.use_oauth and self.allow_cache: - # Try to load from file if possible - if os.path.exists(_token_file): - with open(_token_file) as f: - data = json.load(f) - self.access_token = data['access_token'] - self.refresh_token = data['refresh_token'] - self.expires = data['expires'] - self.refresh_bearer_token() - - def cache_tokens(self): - """Cache tokens to file if allowed.""" - if not self.allow_cache: - return - - data = { - 'access_token': self.access_token, - 'refresh_token': self.refresh_token, - 'expires': self.expires - } - if not os.path.exists(_cache_dir): - os.mkdir(_cache_dir) - with open(_token_file, 'w') as f: - json.dump(data, f) - - def refresh_bearer_token(self, force=False): - """Refreshes the OAuth token if necessary. - - :param bool force: - Force-refresh the bearer token. - """ - if not self.use_oauth: - return - # Skip refresh if it's not necessary and not forced - if self.expires > time.time() and not force: - return - - # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies - start_time = int(time.time() - 30) - data = { - 'client_id': _client_id, - 'client_secret': _client_secret, - 'grant_type': 'refresh_token', - 'refresh_token': self.refresh_token - } - response = request._execute_request( - 'https://oauth2.googleapis.com/token', - 'POST', - headers={ - 'Content-Type': 'application/json' - }, - data=data - ) - response_data = json.loads(response.read()) - - self.access_token = response_data['access_token'] - self.expires = start_time + response_data['expires_in'] - self.cache_tokens() - - def fetch_bearer_token(self): - """Fetch an OAuth token.""" - # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies - start_time = int(time.time() - 30) - data = { - 'client_id': _client_id, - 'scope': 'https://www.googleapis.com/auth/youtube' - } - response = request._execute_request( - 'https://oauth2.googleapis.com/device/code', - 'POST', - headers={ - 'Content-Type': 'application/json' - }, - data=data - ) - response_data = json.loads(response.read()) - verification_url = response_data['verification_url'] - user_code = response_data['user_code'] - print(f'Please open {verification_url} and input code {user_code}') - input('Press enter when you have completed this step.') - - data = { - 'client_id': _client_id, - 'client_secret': _client_secret, - 'device_code': response_data['device_code'], - 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code' - } - response = request._execute_request( - 'https://oauth2.googleapis.com/token', - 'POST', - headers={ - 'Content-Type': 'application/json' - }, - data=data - ) - response_data = json.loads(response.read()) - - self.access_token = response_data['access_token'] - self.refresh_token = response_data['refresh_token'] - self.expires = start_time + response_data['expires_in'] - self.cache_tokens() - - @property - def base_url(self): - """Return the base url endpoint for the innertube API.""" - return 'https://www.youtube.com/youtubei/v1' - - @property - def base_data(self): - """Return the base json data to transmit to the innertube API.""" - return { - 'context': self.context - } - - @property - def base_params(self): - """Return the base query parameters to transmit to the innertube API.""" - return { - 'key': self.api_key, - 'contentCheckOk': True, - 'racyCheckOk': True - } - - def _call_api(self, endpoint, query, data): - """Make a request to a given endpoint with the provided query parameters and data.""" - # Remove the API key if oauth is being used. - if self.use_oauth: - del query['key'] - - endpoint_url = f'{endpoint}?{parse.urlencode(query)}' - headers = { - 'Content-Type': 'application/json', - } - # Add the bearer token if applicable - if self.use_oauth: - if self.access_token: - self.refresh_bearer_token() - headers['Authorization'] = f'Bearer {self.access_token}' - else: - self.fetch_bearer_token() - headers['Authorization'] = f'Bearer {self.access_token}' - - headers.update(self.header) - - response = request._execute_request( - endpoint_url, - 'POST', - headers=headers, - data=data - ) - return json.loads(response.read()) - - def browse(self): - """Make a request to the browse endpoint. - - TODO: Figure out how we can use this - """ - # endpoint = f'{self.base_url}/browse' # noqa:E800 - ... - # return self._call_api(endpoint, query, self.base_data) # noqa:E800 - - def config(self): - """Make a request to the config endpoint. - - TODO: Figure out how we can use this - """ - # endpoint = f'{self.base_url}/config' # noqa:E800 - ... - # return self._call_api(endpoint, query, self.base_data) # noqa:E800 - - def guide(self): - """Make a request to the guide endpoint. - - TODO: Figure out how we can use this - """ - # endpoint = f'{self.base_url}/guide' # noqa:E800 - ... - # return self._call_api(endpoint, query, self.base_data) # noqa:E800 - - def next(self): - """Make a request to the next endpoint. - - TODO: Figure out how we can use this - """ - # endpoint = f'{self.base_url}/next' # noqa:E800 - ... - # return self._call_api(endpoint, query, self.base_data) # noqa:E800 - - def player(self, video_id): - """Make a request to the player endpoint. - - :param str video_id: - The video id to get player info for. - :rtype: dict - :returns: - Raw player info results. - """ - endpoint = f'{self.base_url}/player' - query = { - 'videoId': video_id, - } - query.update(self.base_params) - return self._call_api(endpoint, query, self.base_data) - - def search(self, search_query, continuation=None): - """Make a request to the search endpoint. - - :param str search_query: - The query to search. - :rtype: dict - :returns: - Raw search query results. - """ - endpoint = f'{self.base_url}/search' - query = { - 'query': search_query - } - query.update(self.base_params) - data = {} - if continuation: - data['continuation'] = continuation - data.update(self.base_data) - return self._call_api(endpoint, query, data) - - def verify_age(self, video_id): - """Make a request to the age_verify endpoint. - - Notable examples of the types of video this verification step is for: - * https://www.youtube.com/watch?v=QLdAhwSBZ3w - * https://www.youtube.com/watch?v=hc0ZDaAZQT0 - - :param str video_id: - The video id to get player info for. - :rtype: dict - :returns: - Returns information that includes a URL for bypassing certain restrictions. - """ - endpoint = f'{self.base_url}/verify_age' - data = { - 'nextEndpoint': { - 'urlEndpoint': { - 'url': f'/watch?v={video_id}' - } - }, - 'setControvercy': True - } - data.update(self.base_data) - result = self._call_api(endpoint, self.base_params, data) - return result - - def get_transcript(self, video_id): - """Make a request to the get_transcript endpoint. - - This is likely related to captioning for videos, but is currently untested. - """ - endpoint = f'{self.base_url}/get_transcript' - query = { - 'videoId': video_id, - } - query.update(self.base_params) - result = self._call_api(endpoint, query, self.base_data) - return result diff --git a/lib/pytube/pytube/itags.py b/lib/pytube/pytube/itags.py deleted file mode 100644 index 87536b1..0000000 --- a/lib/pytube/pytube/itags.py +++ /dev/null @@ -1,153 +0,0 @@ -"""This module contains a lookup table of YouTube's itag values.""" -from typing import Dict - -PROGRESSIVE_VIDEO = { - 5: ("240p", "64kbps"), - 6: ("270p", "64kbps"), - 13: ("144p", None), - 17: ("144p", "24kbps"), - 18: ("360p", "96kbps"), - 22: ("720p", "192kbps"), - 34: ("360p", "128kbps"), - 35: ("480p", "128kbps"), - 36: ("240p", None), - 37: ("1080p", "192kbps"), - 38: ("3072p", "192kbps"), - 43: ("360p", "128kbps"), - 44: ("480p", "128kbps"), - 45: ("720p", "192kbps"), - 46: ("1080p", "192kbps"), - 59: ("480p", "128kbps"), - 78: ("480p", "128kbps"), - 82: ("360p", "128kbps"), - 83: ("480p", "128kbps"), - 84: ("720p", "192kbps"), - 85: ("1080p", "192kbps"), - 91: ("144p", "48kbps"), - 92: ("240p", "48kbps"), - 93: ("360p", "128kbps"), - 94: ("480p", "128kbps"), - 95: ("720p", "256kbps"), - 96: ("1080p", "256kbps"), - 100: ("360p", "128kbps"), - 101: ("480p", "192kbps"), - 102: ("720p", "192kbps"), - 132: ("240p", "48kbps"), - 151: ("720p", "24kbps"), - 300: ("720p", "128kbps"), - 301: ("1080p", "128kbps"), -} - -DASH_VIDEO = { - # DASH Video - 133: ("240p", None), # MP4 - 134: ("360p", None), # MP4 - 135: ("480p", None), # MP4 - 136: ("720p", None), # MP4 - 137: ("1080p", None), # MP4 - 138: ("2160p", None), # MP4 - 160: ("144p", None), # MP4 - 167: ("360p", None), # WEBM - 168: ("480p", None), # WEBM - 169: ("720p", None), # WEBM - 170: ("1080p", None), # WEBM - 212: ("480p", None), # MP4 - 218: ("480p", None), # WEBM - 219: ("480p", None), # WEBM - 242: ("240p", None), # WEBM - 243: ("360p", None), # WEBM - 244: ("480p", None), # WEBM - 245: ("480p", None), # WEBM - 246: ("480p", None), # WEBM - 247: ("720p", None), # WEBM - 248: ("1080p", None), # WEBM - 264: ("1440p", None), # MP4 - 266: ("2160p", None), # MP4 - 271: ("1440p", None), # WEBM - 272: ("4320p", None), # WEBM - 278: ("144p", None), # WEBM - 298: ("720p", None), # MP4 - 299: ("1080p", None), # MP4 - 302: ("720p", None), # WEBM - 303: ("1080p", None), # WEBM - 308: ("1440p", None), # WEBM - 313: ("2160p", None), # WEBM - 315: ("2160p", None), # WEBM - 330: ("144p", None), # WEBM - 331: ("240p", None), # WEBM - 332: ("360p", None), # WEBM - 333: ("480p", None), # WEBM - 334: ("720p", None), # WEBM - 335: ("1080p", None), # WEBM - 336: ("1440p", None), # WEBM - 337: ("2160p", None), # WEBM - 394: ("144p", None), # MP4 - 395: ("240p", None), # MP4 - 396: ("360p", None), # MP4 - 397: ("480p", None), # MP4 - 398: ("720p", None), # MP4 - 399: ("1080p", None), # MP4 - 400: ("1440p", None), # MP4 - 401: ("2160p", None), # MP4 - 402: ("4320p", None), # MP4 - 571: ("4320p", None), # MP4 - 694: ("144p", None), # MP4 - 695: ("240p", None), # MP4 - 696: ("360p", None), # MP4 - 697: ("480p", None), # MP4 - 698: ("720p", None), # MP4 - 699: ("1080p", None), # MP4 - 700: ("1440p", None), # MP4 - 701: ("2160p", None), # MP4 - 702: ("4320p", None), # MP4 -} - -DASH_AUDIO = { - # DASH Audio - 139: (None, "48kbps"), # MP4 - 140: (None, "128kbps"), # MP4 - 141: (None, "256kbps"), # MP4 - 171: (None, "128kbps"), # WEBM - 172: (None, "256kbps"), # WEBM - 249: (None, "50kbps"), # WEBM - 250: (None, "70kbps"), # WEBM - 251: (None, "160kbps"), # WEBM - 256: (None, "192kbps"), # MP4 - 258: (None, "384kbps"), # MP4 - 325: (None, None), # MP4 - 328: (None, None), # MP4 -} - -ITAGS = { - **PROGRESSIVE_VIDEO, - **DASH_VIDEO, - **DASH_AUDIO, -} - -HDR = [330, 331, 332, 333, 334, 335, 336, 337] -_3D = [82, 83, 84, 85, 100, 101, 102] -LIVE = [91, 92, 93, 94, 95, 96, 132, 151] - - -def get_format_profile(itag: int) -> Dict: - """Get additional format information for a given itag. - - :param str itag: - YouTube format identifier code. - """ - itag = int(itag) - if itag in ITAGS: - res, bitrate = ITAGS[itag] - else: - res, bitrate = None, None - return { - "resolution": res, - "abr": bitrate, - "is_live": itag in LIVE, - "is_3d": itag in _3D, - "is_hdr": itag in HDR, - "is_dash": ( - itag in DASH_AUDIO - or itag in DASH_VIDEO - ), - } diff --git a/lib/pytube/pytube/metadata.py b/lib/pytube/pytube/metadata.py deleted file mode 100644 index be12c63..0000000 --- a/lib/pytube/pytube/metadata.py +++ /dev/null @@ -1,48 +0,0 @@ -"""This module contains the YouTubeMetadata class.""" -import json -from typing import Dict, List, Optional - - -class YouTubeMetadata: - def __init__(self, metadata: List): - self._raw_metadata: List = metadata - self._metadata = [{}] - - for el in metadata: - # We only add metadata to the dict if it has a simpleText title. - if 'title' in el and 'simpleText' in el['title']: - metadata_title = el['title']['simpleText'] - else: - continue - - contents = el['contents'][0] - if 'simpleText' in contents: - self._metadata[-1][metadata_title] = contents['simpleText'] - elif 'runs' in contents: - self._metadata[-1][metadata_title] = contents['runs'][0]['text'] - - # Upon reaching a dividing line, create a new grouping - if el.get('hasDividerLine', False): - self._metadata.append({}) - - # If we happen to create an empty dict at the end, drop it - if self._metadata[-1] == {}: - self._metadata = self._metadata[:-1] - - def __getitem__(self, key): - return self._metadata[key] - - def __iter__(self): - for el in self._metadata: - yield el - - def __str__(self): - return json.dumps(self._metadata) - - @property - def raw_metadata(self) -> Optional[Dict]: - return self._raw_metadata - - @property - def metadata(self): - return self._metadata diff --git a/lib/pytube/pytube/monostate.py b/lib/pytube/pytube/monostate.py deleted file mode 100644 index 7968af5..0000000 --- a/lib/pytube/pytube/monostate.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Any, Callable, Optional - - -class Monostate: - def __init__( - self, - on_progress: Optional[Callable[[Any, bytes, int], None]], - on_complete: Optional[Callable[[Any, Optional[str]], None]], - title: Optional[str] = None, - duration: Optional[int] = None, - ): - self.on_progress = on_progress - self.on_complete = on_complete - self.title = title - self.duration = duration diff --git a/lib/pytube/pytube/parser.py b/lib/pytube/pytube/parser.py deleted file mode 100644 index fbf27b3..0000000 --- a/lib/pytube/pytube/parser.py +++ /dev/null @@ -1,185 +0,0 @@ -import ast -import json -import re -from pytube.exceptions import HTMLParseError - - -def parse_for_all_objects(html, preceding_regex): - """Parses input html to find all matches for the input starting point. - - :param str html: - HTML to be parsed for an object. - :param str preceding_regex: - Regex to find the string preceding the object. - :rtype list: - :returns: - A list of dicts created from parsing the objects. - """ - result = [] - regex = re.compile(preceding_regex) - match_iter = regex.finditer(html) - for match in match_iter: - if match: - start_index = match.end() - try: - obj = parse_for_object_from_startpoint(html, start_index) - except HTMLParseError: - # Some of the instances might fail because set is technically - # a method of the ytcfg object. We'll skip these since they - # don't seem relevant at the moment. - continue - else: - result.append(obj) - - if len(result) == 0: - raise HTMLParseError(f'No matches for regex {preceding_regex}') - - return result - - -def parse_for_object(html, preceding_regex): - """Parses input html to find the end of a JavaScript object. - - :param str html: - HTML to be parsed for an object. - :param str preceding_regex: - Regex to find the string preceding the object. - :rtype dict: - :returns: - A dict created from parsing the object. - """ - regex = re.compile(preceding_regex) - result = regex.search(html) - if not result: - raise HTMLParseError(f'No matches for regex {preceding_regex}') - - start_index = result.end() - return parse_for_object_from_startpoint(html, start_index) - - -def find_object_from_startpoint(html, start_point): - """Parses input html to find the end of a JavaScript object. - - :param str html: - HTML to be parsed for an object. - :param int start_point: - Index of where the object starts. - :rtype dict: - :returns: - A dict created from parsing the object. - """ - html = html[start_point:] - if html[0] not in ['{','[']: - raise HTMLParseError(f'Invalid start point. Start of HTML:\n{html[:20]}') - - # First letter MUST be a open brace, so we put that in the stack, - # and skip the first character. - last_char = '{' - curr_char = None - stack = [html[0]] - i = 1 - - context_closers = { - '{': '}', - '[': ']', - '"': '"', - '/': '/' # javascript regex - } - - while i < len(html): - if len(stack) == 0: - break - if curr_char not in [' ', '\n']: - last_char = curr_char - curr_char = html[i] - curr_context = stack[-1] - - # If we've reached a context closer, we can remove an element off the stack - if curr_char == context_closers[curr_context]: - stack.pop() - i += 1 - continue - - # Strings and regex expressions require special context handling because they can contain - # context openers *and* closers - if curr_context in ['"', '/']: - # If there's a backslash in a string or regex expression, we skip a character - if curr_char == '\\': - i += 2 - continue - else: - # Non-string contexts are when we need to look for context openers. - if curr_char in context_closers.keys(): - # Slash starts a regular expression depending on context - if not (curr_char == '/' and last_char not in ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';']): - stack.append(curr_char) - - i += 1 - - full_obj = html[:i] - return full_obj # noqa: R504 - - -def parse_for_object_from_startpoint(html, start_point): - """JSONifies an object parsed from HTML. - - :param str html: - HTML to be parsed for an object. - :param int start_point: - Index of where the object starts. - :rtype dict: - :returns: - A dict created from parsing the object. - """ - full_obj = find_object_from_startpoint(html, start_point) - try: - return json.loads(full_obj) - except json.decoder.JSONDecodeError: - try: - return ast.literal_eval(full_obj) - except (ValueError, SyntaxError): - raise HTMLParseError('Could not parse object.') - - -def throttling_array_split(js_array): - """Parses the throttling array into a python list of strings. - - Expects input to begin with `[` and close with `]`. - - :param str js_array: - The javascript array, as a string. - :rtype: list: - :returns: - A list of strings representing splits on `,` in the throttling array. - """ - results = [] - curr_substring = js_array[1:] - - comma_regex = re.compile(r",") - func_regex = re.compile(r"function\([^)]*\)") - - while len(curr_substring) > 0: - if curr_substring.startswith('function'): - # Handle functions separately. These can contain commas - match = func_regex.search(curr_substring) - match_start, match_end = match.span() - - function_text = find_object_from_startpoint(curr_substring, match.span()[1]) - full_function_def = curr_substring[:match_end + len(function_text)] - results.append(full_function_def) - curr_substring = curr_substring[len(full_function_def) + 1:] - else: - match = comma_regex.search(curr_substring) - - # Try-catch to capture end of array - try: - match_start, match_end = match.span() - except AttributeError: - match_start = len(curr_substring) - 1 - match_end = match_start + 1 - - curr_el = curr_substring[:match_start] - results.append(curr_el) - curr_substring = curr_substring[match_end:] - - return results diff --git a/lib/pytube/pytube/query.py b/lib/pytube/pytube/query.py deleted file mode 100644 index 72d2391..0000000 --- a/lib/pytube/pytube/query.py +++ /dev/null @@ -1,424 +0,0 @@ -"""This module provides a query interface for media streams and captions.""" -from collections.abc import Mapping, Sequence -from typing import Callable, List, Optional, Union - -from pytube import Caption, Stream -from pytube.helpers import deprecated - - -class StreamQuery(Sequence): - """Interface for querying the available media streams.""" - - def __init__(self, fmt_streams): - """Construct a :class:`StreamQuery `. - - param list fmt_streams: - list of :class:`Stream ` instances. - """ - self.fmt_streams = fmt_streams - self.itag_index = {int(s.itag): s for s in fmt_streams} - - def filter( - self, - fps=None, - res=None, - resolution=None, - mime_type=None, - type=None, - subtype=None, - file_extension=None, - abr=None, - bitrate=None, - video_codec=None, - audio_codec=None, - only_audio=None, - only_video=None, - progressive=None, - adaptive=None, - is_dash=None, - custom_filter_functions=None, - ): - """Apply the given filtering criterion. - - :param fps: - (optional) The frames per second. - :type fps: - int or None - - :param resolution: - (optional) Alias to ``res``. - :type res: - str or None - - :param res: - (optional) The video resolution. - :type resolution: - str or None - - :param mime_type: - (optional) Two-part identifier for file formats and format contents - composed of a "type", a "subtype". - :type mime_type: - str or None - - :param type: - (optional) Type part of the ``mime_type`` (e.g.: audio, video). - :type type: - str or None - - :param subtype: - (optional) Sub-type part of the ``mime_type`` (e.g.: mp4, mov). - :type subtype: - str or None - - :param file_extension: - (optional) Alias to ``sub_type``. - :type file_extension: - str or None - - :param abr: - (optional) Average bitrate (ABR) refers to the average amount of - data transferred per unit of time (e.g.: 64kbps, 192kbps). - :type abr: - str or None - - :param bitrate: - (optional) Alias to ``abr``. - :type bitrate: - str or None - - :param video_codec: - (optional) Video compression format. - :type video_codec: - str or None - - :param audio_codec: - (optional) Audio compression format. - :type audio_codec: - str or None - - :param bool progressive: - Excludes adaptive streams (one file contains both audio and video - tracks). - - :param bool adaptive: - Excludes progressive streams (audio and video are on separate - tracks). - - :param bool is_dash: - Include/exclude dash streams. - - :param bool only_audio: - Excludes streams with video tracks. - - :param bool only_video: - Excludes streams with audio tracks. - - :param custom_filter_functions: - (optional) Interface for defining complex filters without - subclassing. - :type custom_filter_functions: - list or None - - """ - filters = [] - if res or resolution: - if isinstance(res, str) or isinstance(resolution, str): - filters.append(lambda s: s.resolution == (res or resolution)) - elif isinstance(res, list) or isinstance(resolution, list): - filters.append(lambda s: s.resolution in (res or resolution)) - - if fps: - filters.append(lambda s: s.fps == fps) - - if mime_type: - filters.append(lambda s: s.mime_type == mime_type) - - if type: - filters.append(lambda s: s.type == type) - - if subtype or file_extension: - filters.append(lambda s: s.subtype == (subtype or file_extension)) - - if abr or bitrate: - filters.append(lambda s: s.abr == (abr or bitrate)) - - if video_codec: - filters.append(lambda s: s.video_codec == video_codec) - - if audio_codec: - filters.append(lambda s: s.audio_codec == audio_codec) - - if only_audio: - filters.append( - lambda s: ( - s.includes_audio_track and not s.includes_video_track - ), - ) - - if only_video: - filters.append( - lambda s: ( - s.includes_video_track and not s.includes_audio_track - ), - ) - - if progressive: - filters.append(lambda s: s.is_progressive) - - if adaptive: - filters.append(lambda s: s.is_adaptive) - - if custom_filter_functions: - filters.extend(custom_filter_functions) - - if is_dash is not None: - filters.append(lambda s: s.is_dash == is_dash) - - return self._filter(filters) - - def _filter(self, filters: List[Callable]) -> "StreamQuery": - fmt_streams = self.fmt_streams - for filter_lambda in filters: - fmt_streams = filter(filter_lambda, fmt_streams) - return StreamQuery(list(fmt_streams)) - - def order_by(self, attribute_name: str) -> "StreamQuery": - """Apply a sort order. Filters out stream the do not have the attribute. - - :param str attribute_name: - The name of the attribute to sort by. - """ - has_attribute = [ - s - for s in self.fmt_streams - if getattr(s, attribute_name) is not None - ] - # Check that the attributes have string values. - if has_attribute and isinstance( - getattr(has_attribute[0], attribute_name), str - ): - # Try to return a StreamQuery sorted by the integer representations - # of the values. - try: - return StreamQuery( - sorted( - has_attribute, - key=lambda s: int( - "".join( - filter(str.isdigit, getattr(s, attribute_name)) - ) - ), # type: ignore # noqa: E501 - ) - ) - except ValueError: - pass - - return StreamQuery( - sorted(has_attribute, key=lambda s: getattr(s, attribute_name)) - ) - - def desc(self) -> "StreamQuery": - """Sort streams in descending order. - - :rtype: :class:`StreamQuery ` - - """ - return StreamQuery(self.fmt_streams[::-1]) - - def asc(self) -> "StreamQuery": - """Sort streams in ascending order. - - :rtype: :class:`StreamQuery ` - - """ - return self - - def get_by_itag(self, itag: int) -> Optional[Stream]: - """Get the corresponding :class:`Stream ` for a given itag. - - :param int itag: - YouTube format identifier code. - :rtype: :class:`Stream ` or None - :returns: - The :class:`Stream ` matching the given itag or None if - not found. - - """ - return self.itag_index.get(int(itag)) - - def get_by_resolution(self, resolution: str) -> Optional[Stream]: - """Get the corresponding :class:`Stream ` for a given resolution. - - Stream must be a progressive mp4. - - :param str resolution: - Video resolution i.e. "720p", "480p", "360p", "240p", "144p" - :rtype: :class:`Stream ` or None - :returns: - The :class:`Stream ` matching the given itag or None if - not found. - - """ - return self.filter( - progressive=True, subtype="mp4", resolution=resolution - ).first() - - def get_lowest_resolution(self) -> Optional[Stream]: - """Get lowest resolution stream that is a progressive mp4. - - :rtype: :class:`Stream ` or None - :returns: - The :class:`Stream ` matching the given itag or None if - not found. - - """ - return ( - self.filter(progressive=True, subtype="mp4") - .order_by("resolution") - .first() - ) - - def get_highest_resolution(self) -> Optional[Stream]: - """Get highest resolution stream that is a progressive video. - - :rtype: :class:`Stream ` or None - :returns: - The :class:`Stream ` matching the given itag or None if - not found. - - """ - return self.filter(progressive=True).order_by("resolution").last() - - def get_audio_only(self, subtype: str = "mp4") -> Optional[Stream]: - """Get highest bitrate audio stream for given codec (defaults to mp4) - - :param str subtype: - Audio subtype, defaults to mp4 - :rtype: :class:`Stream ` or None - :returns: - The :class:`Stream ` matching the given itag or None if - not found. - """ - return ( - self.filter(only_audio=True, subtype=subtype) - .order_by("abr") - .last() - ) - - def otf(self, is_otf: bool = False) -> "StreamQuery": - """Filter stream by OTF, useful if some streams have 404 URLs - - :param bool is_otf: Set to False to retrieve only non-OTF streams - :rtype: :class:`StreamQuery ` - :returns: A StreamQuery object with otf filtered streams - """ - return self._filter([lambda s: s.is_otf == is_otf]) - - def first(self) -> Optional[Stream]: - """Get the first :class:`Stream ` in the results. - - :rtype: :class:`Stream ` or None - :returns: - the first result of this query or None if the result doesn't - contain any streams. - - """ - try: - return self.fmt_streams[0] - except IndexError: - return None - - def last(self): - """Get the last :class:`Stream ` in the results. - - :rtype: :class:`Stream ` or None - :returns: - Return the last result of this query or None if the result - doesn't contain any streams. - - """ - try: - return self.fmt_streams[-1] - except IndexError: - pass - - @deprecated("Get the size of this list directly using len()") - def count(self, value: Optional[str] = None) -> int: # pragma: no cover - """Get the count of items in the list. - - :rtype: int - """ - if value: - return self.fmt_streams.count(value) - - return len(self) - - @deprecated("This object can be treated as a list, all() is useless") - def all(self) -> List[Stream]: # pragma: no cover - """Get all the results represented by this query as a list. - - :rtype: list - - """ - return self.fmt_streams - - def __getitem__(self, i: Union[slice, int]): - return self.fmt_streams[i] - - def __len__(self) -> int: - return len(self.fmt_streams) - - def __repr__(self) -> str: - return f"{self.fmt_streams}" - - -class CaptionQuery(Mapping): - """Interface for querying the available captions.""" - - def __init__(self, captions: List[Caption]): - """Construct a :class:`Caption `. - - param list captions: - list of :class:`Caption ` instances. - - """ - self.lang_code_index = {c.code: c for c in captions} - - @deprecated( - "This object can be treated as a dictionary, i.e. captions['en']" - ) - def get_by_language_code( - self, lang_code: str - ) -> Optional[Caption]: # pragma: no cover - """Get the :class:`Caption ` for a given ``lang_code``. - - :param str lang_code: - The code that identifies the caption language. - :rtype: :class:`Caption ` or None - :returns: - The :class:`Caption ` matching the given ``lang_code`` or - None if it does not exist. - """ - return self.lang_code_index.get(lang_code) - - @deprecated("This object can be treated as a dictionary") - def all(self) -> List[Caption]: # pragma: no cover - """Get all the results represented by this query as a list. - - :rtype: list - - """ - return list(self.lang_code_index.values()) - - def __getitem__(self, i: str): - return self.lang_code_index[i] - - def __len__(self) -> int: - return len(self.lang_code_index) - - def __iter__(self): - return iter(self.lang_code_index.values()) - - def __repr__(self) -> str: - return f"{self.lang_code_index}" diff --git a/lib/pytube/pytube/request.py b/lib/pytube/pytube/request.py deleted file mode 100644 index df1f41c..0000000 --- a/lib/pytube/pytube/request.py +++ /dev/null @@ -1,269 +0,0 @@ -"""Implements a simple wrapper around urlopen.""" -import http.client -import json -import logging -import re -import socket -from functools import lru_cache -from urllib import parse -from urllib.error import URLError -from urllib.request import Request, urlopen - -from pytube.exceptions import RegexMatchError, MaxRetriesExceeded -from pytube.helpers import regex_search - -logger = logging.getLogger(__name__) -default_range_size = 9437184 # 9MB - - -def _execute_request( - url, - method=None, - headers=None, - data=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT -): - base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"} - if headers: - base_headers.update(headers) - if data: - # encode data for request - if not isinstance(data, bytes): - data = bytes(json.dumps(data), encoding="utf-8") - if url.lower().startswith("http"): - request = Request(url, headers=base_headers, method=method, data=data) - else: - raise ValueError("Invalid URL") - return urlopen(request, timeout=timeout) # nosec - - -def get(url, extra_headers=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """Send an http GET request. - - :param str url: - The URL to perform the GET request for. - :param dict extra_headers: - Extra headers to add to the request - :rtype: str - :returns: - UTF-8 encoded string of response - """ - if extra_headers is None: - extra_headers = {} - response = _execute_request(url, headers=extra_headers, timeout=timeout) - return response.read().decode("utf-8") - - -def post(url, extra_headers=None, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """Send an http POST request. - - :param str url: - The URL to perform the POST request for. - :param dict extra_headers: - Extra headers to add to the request - :param dict data: - The data to send on the POST request - :rtype: str - :returns: - UTF-8 encoded string of response - """ - # could technically be implemented in get, - # but to avoid confusion implemented like this - if extra_headers is None: - extra_headers = {} - if data is None: - data = {} - # required because the youtube servers are strict on content type - # raises HTTPError [400]: Bad Request otherwise - extra_headers.update({"Content-Type": "application/json"}) - response = _execute_request( - url, - headers=extra_headers, - data=data, - timeout=timeout - ) - return response.read().decode("utf-8") - - -def seq_stream( - url, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - max_retries=0 -): - """Read the response in sequence. - :param str url: The URL to perform the GET request for. - :rtype: Iterable[bytes] - """ - # YouTube expects a request sequence number as part of the parameters. - split_url = parse.urlsplit(url) - base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path) - - querys = dict(parse.parse_qsl(split_url.query)) - - # The 0th sequential request provides the file headers, which tell us - # information about how the file is segmented. - querys['sq'] = 0 - url = base_url + parse.urlencode(querys) - - segment_data = b'' - for chunk in stream(url, timeout=timeout, max_retries=max_retries): - yield chunk - segment_data += chunk - - # We can then parse the header to find the number of segments - stream_info = segment_data.split(b'\r\n') - segment_count_pattern = re.compile(b'Segment-Count: (\\d+)') - for line in stream_info: - match = segment_count_pattern.search(line) - if match: - segment_count = int(match.group(1).decode('utf-8')) - - # We request these segments sequentially to build the file. - seq_num = 1 - while seq_num <= segment_count: - # Create sequential request URL - querys['sq'] = seq_num - url = base_url + parse.urlencode(querys) - - yield from stream(url, timeout=timeout, max_retries=max_retries) - seq_num += 1 - return # pylint: disable=R1711 - - -def stream( - url, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - max_retries=0 -): - """Read the response in chunks. - :param str url: The URL to perform the GET request for. - :rtype: Iterable[bytes] - """ - file_size: int = default_range_size # fake filesize to start - downloaded = 0 - while downloaded < file_size: - stop_pos = min(downloaded + default_range_size, file_size) - 1 - range_header = f"bytes={downloaded}-{stop_pos}" - tries = 0 - - # Attempt to make the request multiple times as necessary. - while True: - # If the max retries is exceeded, raise an exception - if tries >= 1 + max_retries: - raise MaxRetriesExceeded() - - # Try to execute the request, ignoring socket timeouts - try: - response = _execute_request( - url + f"&range={downloaded}-{stop_pos}", - method="GET", - timeout=timeout - ) - except URLError as e: - # We only want to skip over timeout errors, and - # raise any other URLError exceptions - if isinstance(e.reason, socket.timeout): - pass - else: - raise - except http.client.IncompleteRead: - # Allow retries on IncompleteRead errors for unreliable connections - pass - else: - # On a successful request, break from loop - break - tries += 1 - - if file_size == default_range_size: - try: - resp = _execute_request( - url + f"&range={0}-{99999999999}", - method="GET", - timeout=timeout - ) - content_range = resp.info()["Content-Length"] - file_size = int(content_range) - except (KeyError, IndexError, ValueError) as e: - logger.error(e) - while True: - chunk = response.read() - if not chunk: - break - downloaded += len(chunk) - yield chunk - return # pylint: disable=R1711 - - -@lru_cache() -def filesize(url): - """Fetch size in bytes of file at given URL - - :param str url: The URL to get the size of - :returns: int: size in bytes of remote file - """ - return int(head(url)["content-length"]) - - -@lru_cache() -def seq_filesize(url): - """Fetch size in bytes of file at given URL from sequential requests - - :param str url: The URL to get the size of - :returns: int: size in bytes of remote file - """ - total_filesize = 0 - # YouTube expects a request sequence number as part of the parameters. - split_url = parse.urlsplit(url) - base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path) - querys = dict(parse.parse_qsl(split_url.query)) - - # The 0th sequential request provides the file headers, which tell us - # information about how the file is segmented. - querys['sq'] = 0 - url = base_url + parse.urlencode(querys) - response = _execute_request( - url, method="GET" - ) - - response_value = response.read() - # The file header must be added to the total filesize - total_filesize += len(response_value) - - # We can then parse the header to find the number of segments - segment_count = 0 - stream_info = response_value.split(b'\r\n') - segment_regex = b'Segment-Count: (\\d+)' - for line in stream_info: - # One of the lines should contain the segment count, but we don't know - # which, so we need to iterate through the lines to find it - try: - segment_count = int(regex_search(segment_regex, line, 1)) - except RegexMatchError: - pass - - if segment_count == 0: - raise RegexMatchError('seq_filesize', segment_regex) - - # We make HEAD requests to the segments sequentially to find the total filesize. - seq_num = 1 - while seq_num <= segment_count: - # Create sequential request URL - querys['sq'] = seq_num - url = base_url + parse.urlencode(querys) - - total_filesize += int(head(url)['content-length']) - seq_num += 1 - return total_filesize - - -def head(url): - """Fetch headers returned http GET request. - - :param str url: - The URL to perform the GET request for. - :rtype: dict - :returns: - dictionary of lowercase headers - """ - response_headers = _execute_request(url, method="HEAD").info() - return {k.lower(): v for k, v in response_headers.items()} diff --git a/lib/pytube/pytube/streams.py b/lib/pytube/pytube/streams.py deleted file mode 100644 index 179c1ae..0000000 --- a/lib/pytube/pytube/streams.py +++ /dev/null @@ -1,436 +0,0 @@ -""" -This module contains a container for stream manifest data. - -A container object for the media stream (video only / audio only / video+audio -combined). This was referred to as ``Video`` in the legacy pytube version, but -has been renamed to accommodate DASH (which serves the audio and video -separately). -""" -import logging -import os -from math import ceil - -from datetime import datetime -from typing import BinaryIO, Dict, Optional, Tuple -from urllib.error import HTTPError -from urllib.parse import parse_qs - -from pytube import extract, request -from pytube.helpers import safe_filename, target_directory -from pytube.itags import get_format_profile -from pytube.monostate import Monostate - -logger = logging.getLogger(__name__) - - -class Stream: - """Container for stream manifest data.""" - - def __init__( - self, stream: Dict, monostate: Monostate - ): - """Construct a :class:`Stream `. - - :param dict stream: - The unscrambled data extracted from YouTube. - :param dict monostate: - Dictionary of data shared across all instances of - :class:`Stream `. - """ - # A dictionary shared between all instances of :class:`Stream ` - # (Borg pattern). - self._monostate = monostate - - self.url = stream["url"] # signed download url - self.itag = int( - stream["itag"] - ) # stream format id (youtube nomenclature) - - # set type and codec info - - # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis'] - self.mime_type, self.codecs = extract.mime_type_codec(stream["mimeType"]) - - # 'video/webm' -> 'video', 'webm' - self.type, self.subtype = self.mime_type.split("/") - - # ['vp8', 'vorbis'] -> video_codec: vp8, audio_codec: vorbis. DASH - # streams return NoneType for audio/video depending. - self.video_codec, self.audio_codec = self.parse_codecs() - - self.is_otf: bool = stream["is_otf"] - self.bitrate: Optional[int] = stream["bitrate"] - - # filesize in bytes - self._filesize: Optional[int] = int(stream.get('contentLength', 0)) - - # filesize in kilobytes - self._filesize_kb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 * 1000) / 1000) - - # filesize in megabytes - self._filesize_mb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 * 1000) / 1000) - - # filesize in gigabytes(fingers crossed we don't need terabytes going forward though) - self._filesize_gb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 / 1024 * 1000) / 1000) - - # Additional information about the stream format, such as resolution, - # frame rate, and whether the stream is live (HLS) or 3D. - itag_profile = get_format_profile(self.itag) - self.is_dash = itag_profile["is_dash"] - self.abr = itag_profile["abr"] # average bitrate (audio streams only) - if 'fps' in stream: - self.fps = stream['fps'] # Video streams only - self.resolution = itag_profile[ - "resolution" - ] # resolution (e.g.: "480p") - self.is_3d = itag_profile["is_3d"] - self.is_hdr = itag_profile["is_hdr"] - self.is_live = itag_profile["is_live"] - - @property - def is_adaptive(self) -> bool: - """Whether the stream is DASH. - - :rtype: bool - """ - # if codecs has two elements (e.g.: ['vp8', 'vorbis']): 2 % 2 = 0 - # if codecs has one element (e.g.: ['vp8']) 1 % 2 = 1 - return bool(len(self.codecs) % 2) - - @property - def is_progressive(self) -> bool: - """Whether the stream is progressive. - - :rtype: bool - """ - return not self.is_adaptive - - @property - def includes_audio_track(self) -> bool: - """Whether the stream only contains audio. - - :rtype: bool - """ - return self.is_progressive or self.type == "audio" - - @property - def includes_video_track(self) -> bool: - """Whether the stream only contains video. - - :rtype: bool - """ - return self.is_progressive or self.type == "video" - - def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]: - """Get the video/audio codecs from list of codecs. - - Parse a variable length sized list of codecs and returns a - constant two element tuple, with the video codec as the first element - and audio as the second. Returns None if one is not available - (adaptive only). - - :rtype: tuple - :returns: - A two element tuple with audio and video codecs. - - """ - video = None - audio = None - if not self.is_adaptive: - video, audio = self.codecs - elif self.includes_video_track: - video = self.codecs[0] - elif self.includes_audio_track: - audio = self.codecs[0] - return video, audio - - @property - def filesize(self) -> int: - """File size of the media stream in bytes. - - :rtype: int - :returns: - Filesize (in bytes) of the stream. - """ - if self._filesize == 0: - try: - self._filesize = request.filesize(self.url) - except HTTPError as e: - if e.code != 404: - raise - self._filesize = request.seq_filesize(self.url) - return self._filesize - - @property - def filesize_kb(self) -> float: - """File size of the media stream in kilobytes. - - :rtype: float - :returns: - Rounded filesize (in kilobytes) of the stream. - """ - if self._filesize_kb == 0: - try: - self._filesize_kb = float(ceil(request.filesize(self.url)/1024 * 1000) / 1000) - except HTTPError as e: - if e.code != 404: - raise - self._filesize_kb = float(ceil(request.seq_filesize(self.url)/1024 * 1000) / 1000) - return self._filesize_kb - - @property - def filesize_mb(self) -> float: - """File size of the media stream in megabytes. - - :rtype: float - :returns: - Rounded filesize (in megabytes) of the stream. - """ - if self._filesize_mb == 0: - try: - self._filesize_mb = float(ceil(request.filesize(self.url)/1024/1024 * 1000) / 1000) - except HTTPError as e: - if e.code != 404: - raise - self._filesize_mb = float(ceil(request.seq_filesize(self.url)/1024/1024 * 1000) / 1000) - return self._filesize_mb - - @property - def filesize_gb(self) -> float: - """File size of the media stream in gigabytes. - - :rtype: float - :returns: - Rounded filesize (in gigabytes) of the stream. - """ - if self._filesize_gb == 0: - try: - self._filesize_gb = float(ceil(request.filesize(self.url)/1024/1024/1024 * 1000) / 1000) - except HTTPError as e: - if e.code != 404: - raise - self._filesize_gb = float(ceil(request.seq_filesize(self.url)/1024/1024/1024 * 1000) / 1000) - return self._filesize_gb - - @property - def title(self) -> str: - """Get title of video - - :rtype: str - :returns: - Youtube video title - """ - return self._monostate.title or "Unknown YouTube Video Title" - - @property - def filesize_approx(self) -> int: - """Get approximate filesize of the video - - Falls back to HTTP call if there is not sufficient information to approximate - - :rtype: int - :returns: size of video in bytes - """ - if self._monostate.duration and self.bitrate: - bits_in_byte = 8 - return int( - (self._monostate.duration * self.bitrate) / bits_in_byte - ) - - return self.filesize - - @property - def expiration(self) -> datetime: - expire = parse_qs(self.url.split("?")[1])["expire"][0] - return datetime.utcfromtimestamp(int(expire)) - - @property - def default_filename(self) -> str: - """Generate filename based on the video title. - - :rtype: str - :returns: - An os file system compatible filename. - """ - filename = safe_filename(self.title) - return f"{filename}.{self.subtype}" - - def download( - self, - output_path: Optional[str] = None, - filename: Optional[str] = None, - filename_prefix: Optional[str] = None, - skip_existing: bool = True, - timeout: Optional[int] = None, - max_retries: Optional[int] = 0 - ) -> str: - """Write the media stream to disk. - - :param output_path: - (optional) Output path for writing media file. If one is not - specified, defaults to the current working directory. - :type output_path: str or None - :param filename: - (optional) Output filename (stem only) for writing media file. - If one is not specified, the default filename is used. - :type filename: str or None - :param filename_prefix: - (optional) A string that will be prepended to the filename. - For example a number in a playlist or the name of a series. - If one is not specified, nothing will be prepended - This is separate from filename so you can use the default - filename but still add a prefix. - :type filename_prefix: str or None - :param skip_existing: - (optional) Skip existing files, defaults to True - :type skip_existing: bool - :param timeout: - (optional) Request timeout length in seconds. Uses system default. - :type timeout: int - :param max_retries: - (optional) Number of retries to attempt after socket timeout. Defaults to 0. - :type max_retries: int - :returns: - Path to the saved video - :rtype: str - - """ - file_path = self.get_file_path( - filename=filename, - output_path=output_path, - filename_prefix=filename_prefix, - ) - - if skip_existing and self.exists_at_path(file_path): - logger.debug(f'file {file_path} already exists, skipping') - self.on_complete(file_path) - return file_path - - bytes_remaining = self.filesize - logger.debug(f'downloading ({self.filesize} total bytes) file to {file_path}') - - with open(file_path, "wb") as fh: - try: - for chunk in request.stream( - self.url, - timeout=timeout, - max_retries=max_retries - ): - # reduce the (bytes) remainder by the length of the chunk. - bytes_remaining -= len(chunk) - # send to the on_progress callback. - self.on_progress(chunk, fh, bytes_remaining) - except HTTPError as e: - if e.code != 404: - raise - # Some adaptive streams need to be requested with sequence numbers - for chunk in request.seq_stream( - self.url, - timeout=timeout, - max_retries=max_retries - ): - # reduce the (bytes) remainder by the length of the chunk. - bytes_remaining -= len(chunk) - # send to the on_progress callback. - self.on_progress(chunk, fh, bytes_remaining) - self.on_complete(file_path) - return file_path - - def get_file_path( - self, - filename: Optional[str] = None, - output_path: Optional[str] = None, - filename_prefix: Optional[str] = None, - ) -> str: - if not filename: - filename = self.default_filename - if filename_prefix: - filename = f"{filename_prefix}{filename}" - return os.path.join(target_directory(output_path), filename) - - def exists_at_path(self, file_path: str) -> bool: - return ( - os.path.isfile(file_path) - and os.path.getsize(file_path) == self.filesize - ) - - def stream_to_buffer(self, buffer: BinaryIO) -> None: - """Write the media stream to buffer - - :rtype: io.BytesIO buffer - """ - bytes_remaining = self.filesize - logger.info( - "downloading (%s total bytes) file to buffer", self.filesize, - ) - - for chunk in request.stream(self.url): - # reduce the (bytes) remainder by the length of the chunk. - bytes_remaining -= len(chunk) - # send to the on_progress callback. - self.on_progress(chunk, buffer, bytes_remaining) - self.on_complete(None) - - def on_progress( - self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int - ): - """On progress callback function. - - This function writes the binary data to the file, then checks if an - additional callback is defined in the monostate. This is exposed to - allow things like displaying a progress bar. - - :param bytes chunk: - Segment of media file binary data, not yet written to disk. - :param file_handler: - The file handle where the media is being written to. - :type file_handler: - :py:class:`io.BufferedWriter` - :param int bytes_remaining: - The delta between the total file size in bytes and amount already - downloaded. - - :rtype: None - - """ - file_handler.write(chunk) - logger.debug("download remaining: %s", bytes_remaining) - if self._monostate.on_progress: - self._monostate.on_progress(self, chunk, bytes_remaining) - - def on_complete(self, file_path: Optional[str]): - """On download complete handler function. - - :param file_path: - The file handle where the media is being written to. - :type file_path: str - - :rtype: None - - """ - logger.debug("download finished") - on_complete = self._monostate.on_complete - if on_complete: - logger.debug("calling on_complete callback %s", on_complete) - on_complete(self, file_path) - - def __repr__(self) -> str: - """Printable object representation. - - :rtype: str - :returns: - A string representation of a :class:`Stream ` object. - """ - parts = ['itag="{s.itag}"', 'mime_type="{s.mime_type}"'] - if self.includes_video_track: - parts.extend(['res="{s.resolution}"', 'fps="{s.fps}fps"']) - if not self.is_adaptive: - parts.extend( - ['vcodec="{s.video_codec}"', 'acodec="{s.audio_codec}"',] - ) - else: - parts.extend(['vcodec="{s.video_codec}"']) - else: - parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"']) - parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"']) - return f"" diff --git a/lib/pytube/pytube/version.py b/lib/pytube/pytube/version.py deleted file mode 100644 index e2a3cbc..0000000 --- a/lib/pytube/pytube/version.py +++ /dev/null @@ -1,4 +0,0 @@ -__version__ = "15.0.0" - -if __name__ == "__main__": - print(__version__) diff --git a/pyproject.toml b/pyproject.toml index 1c73e9a..c2833fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ python = "^3.11" aiogram = "^3.1.1" rich = "^13.6.0" shazamio = { path = "lib/ShazamIO" } -pytube = { path = "lib/pytube" } sqlitedict = "^2.1.0" spotipy = "^2.23.0" attrs = "^23.1.0" @@ -23,6 +22,7 @@ nest-asyncio = "^1.5.8" icecream = "^2.1.3" m3u8 = "^5.1.0" cryptography = "^43.0.0" +pytubefix = "^8.2.0" [build-system]