diff --git a/bot/modules/youtube/__init__.py b/bot/modules/youtube/__init__.py
index 47e992e..f3907c3 100644
--- a/bot/modules/youtube/__init__.py
+++ b/bot/modules/youtube/__init__.py
@@ -1,8 +1,8 @@
from .youtube import YouTube
-from pytube.exceptions import AgeRestrictedError
+from pytubefix.exceptions import AgeRestrictedError
youtube = YouTube()
-__all__ = ['youtube', 'AgeRestrictedError']
+__all__ = ["youtube", "AgeRestrictedError"]
diff --git a/bot/modules/youtube/downloader.py b/bot/modules/youtube/downloader.py
index bfd3ce6..964a529 100644
--- a/bot/modules/youtube/downloader.py
+++ b/bot/modules/youtube/downloader.py
@@ -1,5 +1,5 @@
from attrs import define
-from pytube import YouTube, Stream
+from pytubefix import YouTube, Stream
from pydub import AudioSegment
from io import BytesIO
@@ -16,12 +16,7 @@ class YouTubeBytestream:
duration: int
@classmethod
- def from_bytestream(
- cls,
- bytestream: BytesIO,
- filename: str,
- duration: float
- ):
+ def from_bytestream(cls, bytestream: BytesIO, filename: str, duration: float):
bytestream.seek(0)
return cls(
file=bytestream.read(),
@@ -30,11 +25,9 @@ class YouTubeBytestream:
)
def __rerender(self):
- segment = AudioSegment.from_file(
- file=BytesIO(self.file)
- )
+ segment = AudioSegment.from_file(file=BytesIO(self.file))
- self.file = segment.export(BytesIO(), format='mp3', codec='libmp3lame').read()
+ self.file = segment.export(BytesIO(), format="mp3", codec="libmp3lame").read()
return self
async def rerender(self):
@@ -54,13 +47,18 @@ class Downloader:
def from_id(cls, yt_id: str):
video = YouTube.from_id(yt_id)
- audio_stream = video.streams.filter(
- only_audio=True,
- ).order_by('abr').desc().first()
+ audio_stream = (
+ video.streams.filter(
+ only_audio=True,
+ )
+ .order_by("abr")
+ .desc()
+ .first()
+ )
return cls(
audio_stream=audio_stream,
- filename=f'{audio_stream.default_filename}.mp3',
+ filename=f"{audio_stream.default_filename}.mp3",
duration=int(video.length),
)
diff --git a/lib/pytube/README.md b/lib/pytube/README.md
deleted file mode 100644
index 6a6e3f2..0000000
--- a/lib/pytube/README.md
+++ /dev/null
@@ -1,93 +0,0 @@
-
-
-### Actively soliciting contributors!
-
-Have ideas for how pytube can be improved? Feel free to open an issue or a pull request!
-
-# pytube
-
-*pytube* is a genuine, lightweight, dependency-free Python library (and command-line utility) for downloading YouTube videos.
-
-## Documentation
-
-Detailed documentation about the usage of the library can be found at [pytube.io](https://pytube.io). This is recommended for most cases. If you want to hastily download a single video, the [quick start](#Quickstart) guide below might be what you're looking for.
-
-## Description
-
-YouTube is the most popular video-sharing platform in the world and as a hacker, you may encounter a situation where you want to script something to download videos. For this, I present to you: *pytube*.
-
-*pytube* is a lightweight library written in Python. It has no third-party
-dependencies and aims to be highly reliable.
-
-*pytube* also makes pipelining easy, allowing you to specify callback functions for different download events, such as ``on progress`` or ``on complete``.
-
-Furthermore, *pytube* includes a command-line utility, allowing you to download videos right from the terminal.
-
-## Features
-
-- Support for both progressive & DASH streams
-- Support for downloading the complete playlist
-- Easily register ``on_download_progress`` & ``on_download_complete`` callbacks
-- Command-line interfaced included
-- Caption track support
-- Outputs caption tracks to .srt format (SubRip Subtitle)
-- Ability to capture thumbnail URL
-- Extensively documented source code
-- No third-party dependencies
-
-## Quickstart
-
-This guide covers the most basic usage of the library. For more detailed information, please refer to [pytube.io](https://pytube.io).
-
-### Installation
-
-Pytube requires an installation of Python 3.6 or greater, as well as pip. (Pip is typically bundled with Python [installations](https://python.org/downloads).)
-
-To install from PyPI with pip:
-
-```bash
-$ python -m pip install pytube
-```
-
-Sometimes, the PyPI release becomes slightly outdated. To install from the source with pip:
-
-```bash
-$ python -m pip install git+https://github.com/pytube/pytube
-```
-
-### Using pytube in a Python script
-
-To download a video using the library in a script, you'll need to import the YouTube class from the library and pass an argument of the video URL. From there, you can access the streams and download them.
-
-```python
- >>> from pytube import YouTube
- >>> YouTube('https://youtu.be/2lAe1cqCOXo').streams.first().download()
- >>> yt = YouTube('http://youtube.com/watch?v=2lAe1cqCOXo')
- >>> yt.streams
- ... .filter(progressive=True, file_extension='mp4')
- ... .order_by('resolution')
- ... .desc()
- ... .first()
- ... .download()
-```
-
-### Using the command-line interface
-
-Using the CLI is remarkably straightforward as well. To download a video at the highest progressive quality, you can use the following command:
-```bash
-$ pytube https://youtube.com/watch?v=2lAe1cqCOXo
-```
-
-You can also do the same for a playlist:
-```bash
-$ pytube https://www.youtube.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n
-```
diff --git a/lib/pytube/pyproject.toml b/lib/pytube/pyproject.toml
deleted file mode 100644
index c831a56..0000000
--- a/lib/pytube/pyproject.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[tool.poetry]
-name = "pytube"
-version = "15.0.1"
-description = "Python 3 library for downloading YouTube Videos."
-authors = ["Ronnie Ghose", "Taylor Fox Dahlin", "Nick Ficano"]
-license = "The Unlicense (Unlicense)"
-keywords = ["youtube", "download", "video", "stream",]
-readme = "README.md"
-homepage = "https://pytube.io"
-repository = "https://github.com/pytube/pytube"
-
-[tool.poetry.dependencies]
-python = ">=3.7"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/lib/pytube/pytube/__init__.py b/lib/pytube/pytube/__init__.py
deleted file mode 100644
index 4eaa1b2..0000000
--- a/lib/pytube/pytube/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# flake8: noqa: F401
-# noreorder
-"""
-Pytube: a very serious Python library for downloading YouTube Videos.
-"""
-__title__ = "pytube"
-__author__ = "Ronnie Ghose, Taylor Fox Dahlin, Nick Ficano"
-__license__ = "The Unlicense (Unlicense)"
-__js__ = None
-__js_url__ = None
-
-from pytube.version import __version__
-from pytube.streams import Stream
-from pytube.captions import Caption
-from pytube.query import CaptionQuery, StreamQuery
-from pytube.__main__ import YouTube
-from pytube.contrib.playlist import Playlist
-from pytube.contrib.channel import Channel
-from pytube.contrib.search import Search
diff --git a/lib/pytube/pytube/__main__.py b/lib/pytube/pytube/__main__.py
deleted file mode 100644
index 60451d3..0000000
--- a/lib/pytube/pytube/__main__.py
+++ /dev/null
@@ -1,479 +0,0 @@
-"""
-This module implements the core developer interface for pytube.
-
-The problem domain of the :class:`YouTube class focuses almost
-exclusively on the developer interface. Pytube offloads the heavy lifting to
-smaller peripheral modules and functions.
-
-"""
-import logging
-from typing import Any, Callable, Dict, List, Optional
-
-import pytube
-import pytube.exceptions as exceptions
-from pytube import extract, request
-from pytube import Stream, StreamQuery
-from pytube.helpers import install_proxy
-from pytube.innertube import InnerTube
-from pytube.metadata import YouTubeMetadata
-from pytube.monostate import Monostate
-
-logger = logging.getLogger(__name__)
-
-
-class YouTube:
- """Core developer interface for pytube."""
-
- def __init__(
- self,
- url: str,
- on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
- on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
- proxies: Dict[str, str] = None,
- use_oauth: bool = False,
- allow_oauth_cache: bool = True
- ):
- """Construct a :class:`YouTube `.
-
- :param str url:
- A valid YouTube watch URL.
- :param func on_progress_callback:
- (Optional) User defined callback function for stream download
- progress events.
- :param func on_complete_callback:
- (Optional) User defined callback function for stream download
- complete events.
- :param dict proxies:
- (Optional) A dict mapping protocol to proxy address which will be used by pytube.
- :param bool use_oauth:
- (Optional) Prompt the user to authenticate to YouTube.
- If allow_oauth_cache is set to True, the user should only be prompted once.
- :param bool allow_oauth_cache:
- (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
- These tokens are only generated if use_oauth is set to True as well.
- """
- self._js: Optional[str] = None # js fetched by js_url
- self._js_url: Optional[str] = None # the url to the js, parsed from watch html
-
- self._vid_info: Optional[Dict] = None # content fetched from innertube/player
-
- self._watch_html: Optional[str] = None # the html of /watch?v=
- self._embed_html: Optional[str] = None
- self._player_config_args: Optional[Dict] = None # inline js in the html containing
- self._age_restricted: Optional[bool] = None
-
- self._fmt_streams: Optional[List[Stream]] = None
-
- self._initial_data = None
- self._metadata: Optional[YouTubeMetadata] = None
-
- # video_id part of /watch?v=
- self.video_id = extract.video_id(url)
-
- self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
- self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
-
- # Shared between all instances of `Stream` (Borg pattern).
- self.stream_monostate = Monostate(
- on_progress=on_progress_callback, on_complete=on_complete_callback
- )
-
- if proxies:
- install_proxy(proxies)
-
- self._author = None
- self._title = None
- self._publish_date = None
-
- self.use_oauth = use_oauth
- self.allow_oauth_cache = allow_oauth_cache
-
- def __repr__(self):
- return f''
-
- def __eq__(self, o: object) -> bool:
- # Compare types and urls, if they're same return true, else return false.
- return type(o) == type(self) and o.watch_url == self.watch_url
-
- @property
- def watch_html(self):
- if self._watch_html:
- return self._watch_html
- self._watch_html = request.get(url=self.watch_url)
- return self._watch_html
-
- @property
- def embed_html(self):
- if self._embed_html:
- return self._embed_html
- self._embed_html = request.get(url=self.embed_url)
- return self._embed_html
-
- @property
- def age_restricted(self):
- if self._age_restricted:
- return self._age_restricted
- self._age_restricted = extract.is_age_restricted(self.watch_html)
- return self._age_restricted
-
- @property
- def js_url(self):
- if self._js_url:
- return self._js_url
-
- if self.age_restricted:
- self._js_url = extract.js_url(self.embed_html)
- else:
- self._js_url = extract.js_url(self.watch_html)
-
- return self._js_url
-
- @property
- def js(self):
- if self._js:
- return self._js
-
- # If the js_url doesn't match the cached url, fetch the new js and update
- # the cache; otherwise, load the cache.
- if pytube.__js_url__ != self.js_url:
- self._js = request.get(self.js_url)
- pytube.__js__ = self._js
- pytube.__js_url__ = self.js_url
- else:
- self._js = pytube.__js__
-
- return self._js
-
- @property
- def initial_data(self):
- if self._initial_data:
- return self._initial_data
- self._initial_data = extract.initial_data(self.watch_html)
- return self._initial_data
-
- @property
- def streaming_data(self):
- """Return streamingData from video info."""
- if 'streamingData' in self.vid_info:
- return self.vid_info['streamingData']
- else:
- self.bypass_age_gate()
- return self.vid_info['streamingData']
-
- @property
- def fmt_streams(self):
- """Returns a list of streams if they have been initialized.
-
- If the streams have not been initialized, finds all relevant
- streams and initializes them.
- """
- self.check_availability()
- if self._fmt_streams:
- return self._fmt_streams
-
- self._fmt_streams = []
-
- stream_manifest = extract.apply_descrambler(self.streaming_data)
-
- # If the cached js doesn't work, try fetching a new js file
- # https://github.com/pytube/pytube/issues/1054
- try:
- extract.apply_signature(stream_manifest, self.vid_info, self.js)
- except exceptions.ExtractError:
- # To force an update to the js file, we clear the cache and retry
- self._js = None
- self._js_url = None
- pytube.__js__ = None
- pytube.__js_url__ = None
- extract.apply_signature(stream_manifest, self.vid_info, self.js)
-
- # build instances of :class:`Stream `
- # Initialize stream objects
- for stream in stream_manifest:
- video = Stream(
- stream=stream,
- monostate=self.stream_monostate,
- )
- self._fmt_streams.append(video)
-
- self.stream_monostate.title = self.title
- self.stream_monostate.duration = self.length
-
- return self._fmt_streams
-
- def check_availability(self):
- """Check whether the video is available.
-
- Raises different exceptions based on why the video is unavailable,
- otherwise does nothing.
- """
- status, messages = extract.playability_status(self.watch_html)
-
- for reason in messages:
- if status == 'UNPLAYABLE':
- if reason == (
- 'Join this channel to get access to members-only content '
- 'like this video, and other exclusive perks.'
- ):
- raise exceptions.MembersOnly(video_id=self.video_id)
- elif reason == 'This live stream recording is not available.':
- raise exceptions.RecordingUnavailable(video_id=self.video_id)
- else:
- raise exceptions.VideoUnavailable(video_id=self.video_id)
- elif status == 'LOGIN_REQUIRED':
- if reason == (
- 'This is a private video. '
- 'Please sign in to verify that you may see it.'
- ):
- raise exceptions.VideoPrivate(video_id=self.video_id)
- elif status == 'ERROR':
- if reason == 'Video unavailable':
- raise exceptions.VideoUnavailable(video_id=self.video_id)
- elif status == 'LIVE_STREAM':
- raise exceptions.LiveStreamError(video_id=self.video_id)
-
- @property
- def vid_info(self):
- """Parse the raw vid info and return the parsed result.
-
- :rtype: Dict[Any, Any]
- """
- if self._vid_info:
- return self._vid_info
-
- innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache)
-
- innertube_response = innertube.player(self.video_id)
- self._vid_info = innertube_response
- return self._vid_info
-
- def bypass_age_gate(self):
- """Attempt to update the vid_info by bypassing the age gate."""
- innertube = InnerTube(
- client='ANDROID_EMBED',
- use_oauth=self.use_oauth,
- allow_cache=self.allow_oauth_cache
- )
- innertube_response = innertube.player(self.video_id)
-
- playability_status = innertube_response['playabilityStatus'].get('status', None)
-
- # If we still can't access the video, raise an exception
- # (tier 3 age restriction)
- if playability_status == 'UNPLAYABLE':
- raise exceptions.AgeRestrictedError(self.video_id)
-
- self._vid_info = innertube_response
-
- @property
- def caption_tracks(self) -> List[pytube.Caption]:
- """Get a list of :class:`Caption `.
-
- :rtype: List[Caption]
- """
- raw_tracks = (
- self.vid_info.get("captions", {})
- .get("playerCaptionsTracklistRenderer", {})
- .get("captionTracks", [])
- )
- return [pytube.Caption(track) for track in raw_tracks]
-
- @property
- def captions(self) -> pytube.CaptionQuery:
- """Interface to query caption tracks.
-
- :rtype: :class:`CaptionQuery `.
- """
- return pytube.CaptionQuery(self.caption_tracks)
-
- @property
- def streams(self) -> StreamQuery:
- """Interface to query both adaptive (DASH) and progressive streams.
-
- :rtype: :class:`StreamQuery `.
- """
- self.check_availability()
- return StreamQuery(self.fmt_streams)
-
- @property
- def thumbnail_url(self) -> str:
- """Get the thumbnail url image.
-
- :rtype: str
- """
- thumbnail_details = (
- self.vid_info.get("videoDetails", {})
- .get("thumbnail", {})
- .get("thumbnails")
- )
- if thumbnail_details:
- thumbnail_details = thumbnail_details[-1] # last item has max size
- return thumbnail_details["url"]
-
- return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
-
- @property
- def publish_date(self):
- """Get the publish date.
-
- :rtype: datetime
- """
- if self._publish_date:
- return self._publish_date
- self._publish_date = extract.publish_date(self.watch_html)
- return self._publish_date
-
- @publish_date.setter
- def publish_date(self, value):
- """Sets the publish date."""
- self._publish_date = value
-
- @property
- def title(self) -> str:
- """Get the video title.
-
- :rtype: str
- """
- if self._title:
- return self._title
-
- try:
- self._title = self.vid_info['videoDetails']['title']
- except KeyError:
- # Check_availability will raise the correct exception in most cases
- # if it doesn't, ask for a report.
- self.check_availability()
- raise exceptions.PytubeError(
- (
- f'Exception while accessing title of {self.watch_url}. '
- 'Please file a bug report at https://github.com/pytube/pytube'
- )
- )
-
- return self._title
-
- @title.setter
- def title(self, value):
- """Sets the title value."""
- self._title = value
-
- @property
- def description(self) -> str:
- """Get the video description.
-
- :rtype: str
- """
- return self.vid_info.get("videoDetails", {}).get("shortDescription")
-
- @property
- def rating(self) -> float:
- """Get the video average rating.
-
- :rtype: float
-
- """
- return self.vid_info.get("videoDetails", {}).get("averageRating")
-
- @property
- def length(self) -> int:
- """Get the video length in seconds.
-
- :rtype: int
- """
- return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
-
- @property
- def views(self) -> int:
- """Get the number of the times the video has been viewed.
-
- :rtype: int
- """
- return int(self.vid_info.get("videoDetails", {}).get("viewCount"))
-
- @property
- def author(self) -> str:
- """Get the video author.
- :rtype: str
- """
- if self._author:
- return self._author
- self._author = self.vid_info.get("videoDetails", {}).get(
- "author", "unknown"
- )
- return self._author
-
- @author.setter
- def author(self, value):
- """Set the video author."""
- self._author = value
-
- @property
- def keywords(self) -> List[str]:
- """Get the video keywords.
-
- :rtype: List[str]
- """
- return self.vid_info.get('videoDetails', {}).get('keywords', [])
-
- @property
- def channel_id(self) -> str:
- """Get the video poster's channel id.
-
- :rtype: str
- """
- return self.vid_info.get('videoDetails', {}).get('channelId', None)
-
- @property
- def channel_url(self) -> str:
- """Construct the channel url for the video's poster from the channel id.
-
- :rtype: str
- """
- return f'https://www.youtube.com/channel/{self.channel_id}'
-
- @property
- def metadata(self) -> Optional[YouTubeMetadata]:
- """Get the metadata for the video.
-
- :rtype: YouTubeMetadata
- """
- if self._metadata:
- return self._metadata
- else:
- self._metadata = extract.metadata(self.initial_data)
- return self._metadata
-
- def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
- """Register a download progress callback function post initialization.
-
- :param callable func:
- A callback function that takes ``stream``, ``chunk``,
- and ``bytes_remaining`` as parameters.
-
- :rtype: None
-
- """
- self.stream_monostate.on_progress = func
-
- def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
- """Register a download complete callback function post initialization.
-
- :param callable func:
- A callback function that takes ``stream`` and ``file_path``.
-
- :rtype: None
-
- """
- self.stream_monostate.on_complete = func
-
- @staticmethod
- def from_id(video_id: str) -> "YouTube":
- """Construct a :class:`YouTube ` object from a video id.
-
- :param str video_id:
- The video id of the YouTube video.
-
- :rtype: :class:`YouTube `
-
- """
- return YouTube(f"https://www.youtube.com/watch?v={video_id}")
diff --git a/lib/pytube/pytube/captions.py b/lib/pytube/pytube/captions.py
deleted file mode 100644
index fe84bec..0000000
--- a/lib/pytube/pytube/captions.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import math
-import os
-import time
-import json
-import xml.etree.ElementTree as ElementTree
-from html import unescape
-from typing import Dict, Optional
-
-from pytube import request
-from pytube.helpers import safe_filename, target_directory
-
-
-class Caption:
- """Container for caption tracks."""
-
- def __init__(self, caption_track: Dict):
- """Construct a :class:`Caption `.
-
- :param dict caption_track:
- Caption track data extracted from ``watch_html``.
- """
- self.url = caption_track.get("baseUrl")
-
- # Certain videos have runs instead of simpleText
- # this handles that edge case
- name_dict = caption_track['name']
- if 'simpleText' in name_dict:
- self.name = name_dict['simpleText']
- else:
- for el in name_dict['runs']:
- if 'text' in el:
- self.name = el['text']
-
- # Use "vssId" instead of "languageCode", fix issue #779
- self.code = caption_track["vssId"]
- # Remove preceding '.' for backwards compatibility, e.g.:
- # English -> vssId: .en, languageCode: en
- # English (auto-generated) -> vssId: a.en, languageCode: en
- self.code = self.code.strip('.')
-
- @property
- def xml_captions(self) -> str:
- """Download the xml caption tracks."""
- return request.get(self.url)
-
- @property
- def json_captions(self) -> dict:
- """Download and parse the json caption tracks."""
- json_captions_url = self.url.replace('fmt=srv3','fmt=json3')
- text = request.get(json_captions_url)
- parsed = json.loads(text)
- assert parsed['wireMagic'] == 'pb3', 'Unexpected captions format'
- return parsed
-
- def generate_srt_captions(self) -> str:
- """Generate "SubRip Subtitle" captions.
-
- Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
- recompiles them into the "SubRip Subtitle" format.
- """
- return self.xml_caption_to_srt(self.xml_captions)
-
- @staticmethod
- def float_to_srt_time_format(d: float) -> str:
- """Convert decimal durations into proper srt format.
-
- :rtype: str
- :returns:
- SubRip Subtitle (str) formatted time duration.
-
- float_to_srt_time_format(3.89) -> '00:00:03,890'
- """
- fraction, whole = math.modf(d)
- time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
- ms = f"{fraction:.3f}".replace("0.", "")
- return time_fmt + ms
-
- def xml_caption_to_srt(self, xml_captions: str) -> str:
- """Convert xml caption tracks to "SubRip Subtitle (srt)".
-
- :param str xml_captions:
- XML formatted caption tracks.
- """
- segments = []
- root = ElementTree.fromstring(xml_captions)
- for i, child in enumerate(list(root)):
- text = child.text or ""
- caption = unescape(text.replace("\n", " ").replace(" ", " "),)
- try:
- duration = float(child.attrib["dur"])
- except KeyError:
- duration = 0.0
- start = float(child.attrib["start"])
- end = start + duration
- sequence_number = i + 1 # convert from 0-indexed to 1.
- line = "{seq}\n{start} --> {end}\n{text}\n".format(
- seq=sequence_number,
- start=self.float_to_srt_time_format(start),
- end=self.float_to_srt_time_format(end),
- text=caption,
- )
- segments.append(line)
- return "\n".join(segments).strip()
-
- def download(
- self,
- title: str,
- srt: bool = True,
- output_path: Optional[str] = None,
- filename_prefix: Optional[str] = None,
- ) -> str:
- """Write the media stream to disk.
-
- :param title:
- Output filename (stem only) for writing media file.
- If one is not specified, the default filename is used.
- :type title: str
- :param srt:
- Set to True to download srt, false to download xml. Defaults to True.
- :type srt bool
- :param output_path:
- (optional) Output path for writing media file. If one is not
- specified, defaults to the current working directory.
- :type output_path: str or None
- :param filename_prefix:
- (optional) A string that will be prepended to the filename.
- For example a number in a playlist or the name of a series.
- If one is not specified, nothing will be prepended
- This is separate from filename so you can use the default
- filename but still add a prefix.
- :type filename_prefix: str or None
-
- :rtype: str
- """
- if title.endswith(".srt") or title.endswith(".xml"):
- filename = ".".join(title.split(".")[:-1])
- else:
- filename = title
-
- if filename_prefix:
- filename = f"{safe_filename(filename_prefix)}{filename}"
-
- filename = safe_filename(filename)
-
- filename += f" ({self.code})"
-
- if srt:
- filename += ".srt"
- else:
- filename += ".xml"
-
- file_path = os.path.join(target_directory(output_path), filename)
-
- with open(file_path, "w", encoding="utf-8") as file_handle:
- if srt:
- file_handle.write(self.generate_srt_captions())
- else:
- file_handle.write(self.xml_captions)
-
- return file_path
-
- def __repr__(self):
- """Printable object representation."""
- return ''.format(s=self)
diff --git a/lib/pytube/pytube/cipher.py b/lib/pytube/pytube/cipher.py
deleted file mode 100644
index 77b0f2a..0000000
--- a/lib/pytube/pytube/cipher.py
+++ /dev/null
@@ -1,697 +0,0 @@
-"""
-This module contains all logic necessary to decipher the signature.
-
-YouTube's strategy to restrict downloading videos is to send a ciphered version
-of the signature to the client, along with the decryption algorithm obfuscated
-in JavaScript. For the clients to play the videos, JavaScript must take the
-ciphered version, cycle it through a series of "transform functions," and then
-signs the media URL with the output.
-
-This module is responsible for (1) finding and extracting those "transform
-functions" (2) maps them to Python equivalents and (3) taking the ciphered
-signature and decoding it.
-
-"""
-import logging
-import re
-from itertools import chain
-from typing import Any, Callable, Dict, List, Optional, Tuple
-
-from pytube.exceptions import ExtractError, RegexMatchError
-from pytube.helpers import cache, regex_search
-from pytube.parser import find_object_from_startpoint, throttling_array_split
-
-logger = logging.getLogger(__name__)
-
-
-class Cipher:
- def __init__(self, js: str):
- self.transform_plan: List[str] = get_transform_plan(js)
- var_regex = re.compile(r"^\w+\W")
- var_match = var_regex.search(self.transform_plan[0])
- if not var_match:
- raise RegexMatchError(
- caller="__init__", pattern=var_regex.pattern
- )
- var = var_match.group(0)[:-1]
- self.transform_map = get_transform_map(js, var)
- self.js_func_patterns = [
- r"\w+\.(\w+)\(\w,(\d+)\)",
- r"\w+\[(\"\w+\")\]\(\w,(\d+)\)"
- ]
-
- self.throttling_plan = get_throttling_plan(js)
- self.throttling_array = get_throttling_function_array(js)
-
- self.calculated_n = None
-
- def calculate_n(self, initial_n: list):
- """Converts n to the correct value to prevent throttling."""
- if self.calculated_n:
- return self.calculated_n
-
- # First, update all instances of 'b' with the list(initial_n)
- for i in range(len(self.throttling_array)):
- if self.throttling_array[i] == 'b':
- self.throttling_array[i] = initial_n
-
- for step in self.throttling_plan:
- curr_func = self.throttling_array[int(step[0])]
- if not callable(curr_func):
- logger.debug(f'{curr_func} is not callable.')
- logger.debug(f'Throttling array:\n{self.throttling_array}\n')
- raise ExtractError(f'{curr_func} is not callable.')
-
- first_arg = self.throttling_array[int(step[1])]
-
- if len(step) == 2:
- curr_func(first_arg)
- elif len(step) == 3:
- second_arg = self.throttling_array[int(step[2])]
- curr_func(first_arg, second_arg)
-
- self.calculated_n = ''.join(initial_n)
- return self.calculated_n
-
- def get_signature(self, ciphered_signature: str) -> str:
- """Decipher the signature.
-
- Taking the ciphered signature, applies the transform functions.
-
- :param str ciphered_signature:
- The ciphered signature sent in the ``player_config``.
- :rtype: str
- :returns:
- Decrypted signature required to download the media content.
- """
- signature = list(ciphered_signature)
-
- for js_func in self.transform_plan:
- name, argument = self.parse_function(js_func) # type: ignore
- signature = self.transform_map[name](signature, argument)
- logger.debug(
- "applied transform function\n"
- "output: %s\n"
- "js_function: %s\n"
- "argument: %d\n"
- "function: %s",
- "".join(signature),
- name,
- argument,
- self.transform_map[name],
- )
-
- return "".join(signature)
-
- @cache
- def parse_function(self, js_func: str) -> Tuple[str, int]:
- """Parse the Javascript transform function.
-
- Break a JavaScript transform function down into a two element ``tuple``
- containing the function name and some integer-based argument.
-
- :param str js_func:
- The JavaScript version of the transform function.
- :rtype: tuple
- :returns:
- two element tuple containing the function name and an argument.
-
- **Example**:
-
- parse_function('DE.AJ(a,15)')
- ('AJ', 15)
-
- """
- logger.debug("parsing transform function")
- for pattern in self.js_func_patterns:
- regex = re.compile(pattern)
- parse_match = regex.search(js_func)
- if parse_match:
- fn_name, fn_arg = parse_match.groups()
- return fn_name, int(fn_arg)
-
- raise RegexMatchError(
- caller="parse_function", pattern="js_func_patterns"
- )
-
-
-def get_initial_function_name(js: str) -> str:
- """Extract the name of the function responsible for computing the signature.
- :param str js:
- The contents of the base.js asset file.
- :rtype: str
- :returns:
- Function name from regex match
- """
-
- function_patterns = [
- r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
- r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
- r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(',
- r"\.sig\|\|(?P[a-zA-Z0-9$]+)\(",
- r"yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(", # noqa: E501
- ]
- logger.debug("finding initial function name")
- for pattern in function_patterns:
- regex = re.compile(pattern)
- function_match = regex.search(js)
- if function_match:
- logger.debug("finished regex search, matched: %s", pattern)
- return function_match.group(1)
-
- raise RegexMatchError(
- caller="get_initial_function_name", pattern="multiple"
- )
-
-
-def get_transform_plan(js: str) -> List[str]:
- """Extract the "transform plan".
-
- The "transform plan" is the functions that the ciphered signature is
- cycled through to obtain the actual signature.
-
- :param str js:
- The contents of the base.js asset file.
-
- **Example**:
-
- ['DE.AJ(a,15)',
- 'DE.VR(a,3)',
- 'DE.AJ(a,51)',
- 'DE.VR(a,3)',
- 'DE.kT(a,51)',
- 'DE.kT(a,8)',
- 'DE.VR(a,3)',
- 'DE.kT(a,21)']
- """
- name = re.escape(get_initial_function_name(js))
- pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
- logger.debug("getting transform plan")
- return regex_search(pattern, js, group=1).split(";")
-
-
-def get_transform_object(js: str, var: str) -> List[str]:
- """Extract the "transform object".
-
- The "transform object" contains the function definitions referenced in the
- "transform plan". The ``var`` argument is the obfuscated variable name
- which contains these functions, for example, given the function call
- ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.
-
- :param str js:
- The contents of the base.js asset file.
- :param str var:
- The obfuscated variable name that stores an object with all functions
- that descrambles the signature.
-
- **Example**:
-
- >>> get_transform_object(js, 'DE')
- ['AJ:function(a){a.reverse()}',
- 'VR:function(a,b){a.splice(0,b)}',
- 'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
-
- """
- pattern = r"var %s={(.*?)};" % re.escape(var)
- logger.debug("getting transform object")
- regex = re.compile(pattern, flags=re.DOTALL)
- transform_match = regex.search(js)
- if not transform_match:
- raise RegexMatchError(caller="get_transform_object", pattern=pattern)
-
- return transform_match.group(1).replace("\n", " ").split(", ")
-
-
-def get_transform_map(js: str, var: str) -> Dict:
- """Build a transform function lookup.
-
- Build a lookup table of obfuscated JavaScript function names to the
- Python equivalents.
-
- :param str js:
- The contents of the base.js asset file.
- :param str var:
- The obfuscated variable name that stores an object with all functions
- that descrambles the signature.
-
- """
- transform_object = get_transform_object(js, var)
- mapper = {}
- for obj in transform_object:
- # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
- name, function = obj.split(":", 1)
- fn = map_functions(function)
- mapper[name] = fn
- return mapper
-
-
-def get_throttling_function_name(js: str) -> str:
- """Extract the name of the function that computes the throttling parameter.
-
- :param str js:
- The contents of the base.js asset file.
- :rtype: str
- :returns:
- The name of the function used to compute the throttling parameter.
- """
- function_patterns = [
- # https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-865985377
- # https://github.com/yt-dlp/yt-dlp/commit/48416bc4a8f1d5ff07d5977659cb8ece7640dcd8
- # var Bpa = [iha];
- # ...
- # a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b),
- # Bpa.length || iha("")) }};
- # In the above case, `iha` is the relevant function name
- r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&.*?\|\|\s*([a-z]+)',
- r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])\([a-z]\)',
- ]
- logger.debug('Finding throttling function name')
- for pattern in function_patterns:
- regex = re.compile(pattern)
- function_match = regex.search(js)
- if function_match:
- logger.debug("finished regex search, matched: %s", pattern)
- if len(function_match.groups()) == 1:
- return function_match.group(1)
- idx = function_match.group(2)
- if idx:
- idx = idx.strip("[]")
- array = re.search(
- r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
- nfunc=re.escape(function_match.group(1))),
- js
- )
- if array:
- array = array.group(1).strip("[]").split(",")
- array = [x.strip() for x in array]
- return array[int(idx)]
-
- raise RegexMatchError(
- caller="get_throttling_function_name", pattern="multiple"
- )
-
-
-def get_throttling_function_code(js: str) -> str:
- """Extract the raw code for the throttling function.
-
- :param str js:
- The contents of the base.js asset file.
- :rtype: str
- :returns:
- The name of the function used to compute the throttling parameter.
- """
- # Begin by extracting the correct function name
- name = re.escape(get_throttling_function_name(js))
-
- # Identify where the function is defined
- pattern_start = r"%s=function\(\w\)" % name
- regex = re.compile(pattern_start)
- match = regex.search(js)
-
- # Extract the code within curly braces for the function itself, and merge any split lines
- code_lines_list = find_object_from_startpoint(js, match.span()[1]).split('\n')
- joined_lines = "".join(code_lines_list)
-
- # Prepend function definition (e.g. `Dea=function(a)`)
- return match.group(0) + joined_lines
-
-
-def get_throttling_function_array(js: str) -> List[Any]:
- """Extract the "c" array.
-
- :param str js:
- The contents of the base.js asset file.
- :returns:
- The array of various integers, arrays, and functions.
- """
- raw_code = get_throttling_function_code(js)
-
- array_start = r",c=\["
- array_regex = re.compile(array_start)
- match = array_regex.search(raw_code)
-
- array_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
- str_array = throttling_array_split(array_raw)
-
- converted_array = []
- for el in str_array:
- try:
- converted_array.append(int(el))
- continue
- except ValueError:
- # Not an integer value.
- pass
-
- if el == 'null':
- converted_array.append(None)
- continue
-
- if el.startswith('"') and el.endswith('"'):
- # Convert e.g. '"abcdef"' to string without quotation marks, 'abcdef'
- converted_array.append(el[1:-1])
- continue
-
- if el.startswith('function'):
- mapper = (
- (r"{for\(\w=\(\w%\w\.length\+\w\.length\)%\w\.length;\w--;\)\w\.unshift\(\w.pop\(\)\)}", throttling_unshift), # noqa:E501
- (r"{\w\.reverse\(\)}", throttling_reverse),
- (r"{\w\.push\(\w\)}", throttling_push),
- (r";var\s\w=\w\[0\];\w\[0\]=\w\[\w\];\w\[\w\]=\w}", throttling_swap),
- (r"case\s\d+", throttling_cipher_function),
- (r"\w\.splice\(0,1,\w\.splice\(\w,1,\w\[0\]\)\[0\]\)", throttling_nested_splice), # noqa:E501
- (r";\w\.splice\(\w,1\)}", js_splice),
- (r"\w\.splice\(-\w\)\.reverse\(\)\.forEach\(function\(\w\){\w\.unshift\(\w\)}\)", throttling_prepend), # noqa:E501
- (r"for\(var \w=\w\.length;\w;\)\w\.push\(\w\.splice\(--\w,1\)\[0\]\)}", throttling_reverse), # noqa:E501
- )
-
- found = False
- for pattern, fn in mapper:
- if re.search(pattern, el):
- converted_array.append(fn)
- found = True
- if found:
- continue
-
- converted_array.append(el)
-
- # Replace null elements with array itself
- for i in range(len(converted_array)):
- if converted_array[i] is None:
- converted_array[i] = converted_array
-
- return converted_array
-
-
-def get_throttling_plan(js: str):
- """Extract the "throttling plan".
-
- The "throttling plan" is a list of tuples used for calling functions
- in the c array. The first element of the tuple is the index of the
- function to call, and any remaining elements of the tuple are arguments
- to pass to that function.
-
- :param str js:
- The contents of the base.js asset file.
- :returns:
- The full function code for computing the throttlign parameter.
- """
- raw_code = get_throttling_function_code(js)
-
- transform_start = r"try{"
- plan_regex = re.compile(transform_start)
- match = plan_regex.search(raw_code)
-
- transform_plan_raw = js
-
- # Steps are either c[x](c[y]) or c[x](c[y],c[z])
- step_start = r"c\[(\d+)\]\(c\[(\d+)\](,c(\[(\d+)\]))?\)"
- step_regex = re.compile(step_start)
- matches = step_regex.findall(transform_plan_raw)
- transform_steps = []
- for match in matches:
- if match[4] != '':
- transform_steps.append((match[0],match[1],match[4]))
- else:
- transform_steps.append((match[0],match[1]))
-
- return transform_steps
-
-
-def reverse(arr: List, _: Optional[Any]):
- """Reverse elements in a list.
-
- This function is equivalent to:
-
- .. code-block:: javascript
-
- function(a, b) { a.reverse() }
-
- This method takes an unused ``b`` variable as their transform functions
- universally sent two arguments.
-
- **Example**:
-
- >>> reverse([1, 2, 3, 4])
- [4, 3, 2, 1]
- """
- return arr[::-1]
-
-
-def splice(arr: List, b: int):
- """Add/remove items to/from a list.
-
- This function is equivalent to:
-
- .. code-block:: javascript
-
- function(a, b) { a.splice(0, b) }
-
- **Example**:
-
- >>> splice([1, 2, 3, 4], 2)
- [1, 2]
- """
- return arr[b:]
-
-
-def swap(arr: List, b: int):
- """Swap positions at b modulus the list length.
-
- This function is equivalent to:
-
- .. code-block:: javascript
-
- function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }
-
- **Example**:
-
- >>> swap([1, 2, 3, 4], 2)
- [3, 2, 1, 4]
- """
- r = b % len(arr)
- return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
-
-
-def throttling_reverse(arr: list):
- """Reverses the input list.
-
- Needs to do an in-place reversal so that the passed list gets changed.
- To accomplish this, we create a reversed copy, and then change each
- indvidual element.
- """
- reverse_copy = arr.copy()[::-1]
- for i in range(len(reverse_copy)):
- arr[i] = reverse_copy[i]
-
-
-def throttling_push(d: list, e: Any):
- """Pushes an element onto a list."""
- d.append(e)
-
-
-def throttling_mod_func(d: list, e: int):
- """Perform the modular function from the throttling array functions.
-
- In the javascript, the modular operation is as follows:
- e = (e % d.length + d.length) % d.length
-
- We simply translate this to python here.
- """
- return (e % len(d) + len(d)) % len(d)
-
-
-def throttling_unshift(d: list, e: int):
- """Rotates the elements of the list to the right.
-
- In the javascript, the operation is as follows:
- for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())
- """
- e = throttling_mod_func(d, e)
- new_arr = d[-e:] + d[:-e]
- d.clear()
- for el in new_arr:
- d.append(el)
-
-
-def throttling_cipher_function(d: list, e: str):
- """This ciphers d with e to generate a new list.
-
- In the javascript, the operation is as follows:
- var h = [A-Za-z0-9-_], f = 96; // simplified from switch-case loop
- d.forEach(
- function(l,m,n){
- this.push(
- n[m]=h[
- (h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length
- ]
- )
- },
- e.split("")
- )
- """
- h = list('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
- f = 96
- # by naming it "this" we can more closely reflect the js
- this = list(e)
-
- # This is so we don't run into weirdness with enumerate while
- # we change the input list
- copied_list = d.copy()
-
- for m, l in enumerate(copied_list):
- bracket_val = (h.index(l) - h.index(this[m]) + m - 32 + f) % len(h)
- this.append(
- h[bracket_val]
- )
- d[m] = h[bracket_val]
- f -= 1
-
-
-def throttling_nested_splice(d: list, e: int):
- """Nested splice function in throttling js.
-
- In the javascript, the operation is as follows:
- function(d,e){
- e=(e%d.length+d.length)%d.length;
- d.splice(
- 0,
- 1,
- d.splice(
- e,
- 1,
- d[0]
- )[0]
- )
- }
-
- While testing, all this seemed to do is swap element 0 and e,
- but the actual process is preserved in case there was an edge
- case that was not considered.
- """
- e = throttling_mod_func(d, e)
- inner_splice = js_splice(
- d,
- e,
- 1,
- d[0]
- )
- js_splice(
- d,
- 0,
- 1,
- inner_splice[0]
- )
-
-
-def throttling_prepend(d: list, e: int):
- """
-
- In the javascript, the operation is as follows:
- function(d,e){
- e=(e%d.length+d.length)%d.length;
- d.splice(-e).reverse().forEach(
- function(f){
- d.unshift(f)
- }
- )
- }
-
- Effectively, this moves the last e elements of d to the beginning.
- """
- start_len = len(d)
- # First, calculate e
- e = throttling_mod_func(d, e)
-
- # Then do the prepending
- new_arr = d[-e:] + d[:-e]
-
- # And update the input list
- d.clear()
- for el in new_arr:
- d.append(el)
-
- end_len = len(d)
- assert start_len == end_len
-
-
-def throttling_swap(d: list, e: int):
- """Swap positions of the 0'th and e'th elements in-place."""
- e = throttling_mod_func(d, e)
- f = d[0]
- d[0] = d[e]
- d[e] = f
-
-
-def js_splice(arr: list, start: int, delete_count=None, *items):
- """Implementation of javascript's splice function.
-
- :param list arr:
- Array to splice
- :param int start:
- Index at which to start changing the array
- :param int delete_count:
- Number of elements to delete from the array
- :param *items:
- Items to add to the array
-
- Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice # noqa:E501
- """
- # Special conditions for start value
- try:
- if start > len(arr):
- start = len(arr)
- # If start is negative, count backwards from end
- if start < 0:
- start = len(arr) - start
- except TypeError:
- # Non-integer start values are treated as 0 in js
- start = 0
-
- # Special condition when delete_count is greater than remaining elements
- if not delete_count or delete_count >= len(arr) - start:
- delete_count = len(arr) - start # noqa: N806
-
- deleted_elements = arr[start:start + delete_count]
-
- # Splice appropriately.
- new_arr = arr[:start] + list(items) + arr[start + delete_count:]
-
- # Replace contents of input array
- arr.clear()
- for el in new_arr:
- arr.append(el)
-
- return deleted_elements
-
-
-def map_functions(js_func: str) -> Callable:
- """For a given JavaScript transform function, return the Python equivalent.
-
- :param str js_func:
- The JavaScript version of the transform function.
- """
- mapper = (
- # function(a){a.reverse()}
- (r"{\w\.reverse\(\)}", reverse),
- # function(a,b){a.splice(0,b)}
- (r"{\w\.splice\(0,\w\)}", splice),
- # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
- (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
- # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
- (
- r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",
- swap,
- ),
- )
-
- for pattern, fn in mapper:
- if re.search(pattern, js_func):
- return fn
- raise RegexMatchError(caller="map_functions", pattern="multiple")
diff --git a/lib/pytube/pytube/cli.py b/lib/pytube/pytube/cli.py
deleted file mode 100644
index c403497..0000000
--- a/lib/pytube/pytube/cli.py
+++ /dev/null
@@ -1,560 +0,0 @@
-#!/usr/bin/env python3
-"""A simple command line application to download youtube videos."""
-import argparse
-import gzip
-import json
-import logging
-import os
-import shutil
-import sys
-import datetime as dt
-import subprocess # nosec
-from typing import List, Optional
-
-import pytube.exceptions as exceptions
-from pytube import __version__
-from pytube import CaptionQuery, Playlist, Stream, YouTube
-from pytube.helpers import safe_filename, setup_logger
-
-
-logger = logging.getLogger(__name__)
-
-
-def main():
- """Command line application to download youtube videos."""
- # noinspection PyTypeChecker
- parser = argparse.ArgumentParser(description=main.__doc__)
- args = _parse_args(parser)
- if args.verbose:
- log_filename = None
- if args.logfile:
- log_filename = args.logfile
- setup_logger(logging.DEBUG, log_filename=log_filename)
- logger.debug(f'Pytube version: {__version__}')
-
- if not args.url or "youtu" not in args.url:
- parser.print_help()
- sys.exit(1)
-
- if "/playlist" in args.url:
- print("Loading playlist...")
- playlist = Playlist(args.url)
- if not args.target:
- args.target = safe_filename(playlist.title)
- for youtube_video in playlist.videos:
- try:
- _perform_args_on_youtube(youtube_video, args)
- except exceptions.PytubeError as e:
- print(f"There was an error with video: {youtube_video}")
- print(e)
- else:
- print("Loading video...")
- youtube = YouTube(args.url)
- _perform_args_on_youtube(youtube, args)
-
-
-def _perform_args_on_youtube(
- youtube: YouTube, args: argparse.Namespace
-) -> None:
- if len(sys.argv) == 2 : # no arguments parsed
- download_highest_resolution_progressive(
- youtube=youtube, resolution="highest", target=args.target
- )
- if args.list_captions:
- _print_available_captions(youtube.captions)
- if args.list:
- display_streams(youtube)
- if args.build_playback_report:
- build_playback_report(youtube)
- if args.itag:
- download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
- if args.caption_code:
- download_caption(
- youtube=youtube, lang_code=args.caption_code, target=args.target
- )
- if args.resolution:
- download_by_resolution(
- youtube=youtube, resolution=args.resolution, target=args.target
- )
- if args.audio:
- download_audio(
- youtube=youtube, filetype=args.audio, target=args.target
- )
- if args.ffmpeg:
- ffmpeg_process(
- youtube=youtube, resolution=args.ffmpeg, target=args.target
- )
-
-
-def _parse_args(
- parser: argparse.ArgumentParser, args: Optional[List] = None
-) -> argparse.Namespace:
- parser.add_argument(
- "url", help="The YouTube /watch or /playlist url", nargs="?"
- )
- parser.add_argument(
- "--version", action="version", version="%(prog)s " + __version__,
- )
- parser.add_argument(
- "--itag", type=int, help="The itag for the desired stream",
- )
- parser.add_argument(
- "-r",
- "--resolution",
- type=str,
- help="The resolution for the desired stream",
- )
- parser.add_argument(
- "-l",
- "--list",
- action="store_true",
- help=(
- "The list option causes pytube cli to return a list of streams "
- "available to download"
- ),
- )
- parser.add_argument(
- "-v",
- "--verbose",
- action="store_true",
- dest="verbose",
- help="Set logger output to verbose output.",
- )
- parser.add_argument(
- "--logfile",
- action="store",
- help="logging debug and error messages into a log file",
- )
- parser.add_argument(
- "--build-playback-report",
- action="store_true",
- help="Save the html and js to disk",
- )
- parser.add_argument(
- "-c",
- "--caption-code",
- type=str,
- help=(
- "Download srt captions for given language code. "
- "Prints available language codes if no argument given"
- ),
- )
- parser.add_argument(
- '-lc',
- '--list-captions',
- action='store_true',
- help=(
- "List available caption codes for a video"
- )
- )
- parser.add_argument(
- "-t",
- "--target",
- help=(
- "The output directory for the downloaded stream. "
- "Default is current working directory"
- ),
- )
- parser.add_argument(
- "-a",
- "--audio",
- const="mp4",
- nargs="?",
- help=(
- "Download the audio for a given URL at the highest bitrate available. "
- "Defaults to mp4 format if none is specified"
- ),
- )
- parser.add_argument(
- "-f",
- "--ffmpeg",
- const="best",
- nargs="?",
- help=(
- "Downloads the audio and video stream for resolution provided. "
- "If no resolution is provided, downloads the best resolution. "
- "Runs the command line program ffmpeg to combine the audio and video"
- ),
- )
-
- return parser.parse_args(args)
-
-
-def build_playback_report(youtube: YouTube) -> None:
- """Serialize the request data to json for offline debugging.
-
- :param YouTube youtube:
- A YouTube object.
- """
- ts = int(dt.datetime.utcnow().timestamp())
- fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
-
- js = youtube.js
- watch_html = youtube.watch_html
- vid_info = youtube.vid_info
-
- with gzip.open(fp, "wb") as fh:
- fh.write(
- json.dumps(
- {
- "url": youtube.watch_url,
- "js": js,
- "watch_html": watch_html,
- "video_info": vid_info,
- }
- ).encode("utf8"),
- )
-
-
-def display_progress_bar(
- bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55
-) -> None:
- """Display a simple, pretty progress bar.
-
- Example:
- ~~~~~~~~
- PSY - GANGNAM STYLE(강남스타일) MV.mp4
- ↳ |███████████████████████████████████████| 100.0%
-
- :param int bytes_received:
- The delta between the total file size (bytes) and bytes already
- written to disk.
- :param int filesize:
- File size of the media stream in bytes.
- :param str ch:
- Character to use for presenting progress segment.
- :param float scale:
- Scale multiplier to reduce progress bar size.
-
- """
- columns = shutil.get_terminal_size().columns
- max_width = int(columns * scale)
-
- filled = int(round(max_width * bytes_received / float(filesize)))
- remaining = max_width - filled
- progress_bar = ch * filled + " " * remaining
- percent = round(100.0 * bytes_received / float(filesize), 1)
- text = f" ↳ |{progress_bar}| {percent}%\r"
- sys.stdout.write(text)
- sys.stdout.flush()
-
-
-# noinspection PyUnusedLocal
-def on_progress(
- stream: Stream, chunk: bytes, bytes_remaining: int
-) -> None: # pylint: disable=W0613
- filesize = stream.filesize
- bytes_received = filesize - bytes_remaining
- display_progress_bar(bytes_received, filesize)
-
-
-def _download(
- stream: Stream,
- target: Optional[str] = None,
- filename: Optional[str] = None,
-) -> None:
- filesize_megabytes = stream.filesize // 1048576
- print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
- file_path = stream.get_file_path(filename=filename, output_path=target)
- if stream.exists_at_path(file_path):
- print(f"Already downloaded at:\n{file_path}")
- return
-
- stream.download(output_path=target, filename=filename)
- sys.stdout.write("\n")
-
-
-def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
- """
- Given a base name, the file format, and the target directory, will generate
- a filename unique for that directory and file format.
- :param str base:
- The given base-name.
- :param str subtype:
- The filetype of the video which will be downloaded.
- :param str media_type:
- The media_type of the file, ie. "audio" or "video"
- :param Path target:
- Target directory for download.
- """
- counter = 0
- while True:
- file_name = f"{base}_{media_type}_{counter}"
- file_path = os.path.join(target, f"{file_name}.{subtype}")
- if not os.path.exists(file_path):
- return file_name
- counter += 1
-
-
-def ffmpeg_process(
- youtube: YouTube, resolution: str, target: Optional[str] = None
-) -> None:
- """
- Decides the correct video stream to download, then calls _ffmpeg_downloader.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param str resolution:
- YouTube video resolution.
- :param str target:
- Target directory for download
- """
- youtube.register_on_progress_callback(on_progress)
- target = target or os.getcwd()
-
- if resolution == "best":
- highest_quality_stream = (
- youtube.streams.filter(progressive=False)
- .order_by("resolution")
- .last()
- )
- mp4_stream = (
- youtube.streams.filter(progressive=False, subtype="mp4")
- .order_by("resolution")
- .last()
- )
- if highest_quality_stream.resolution == mp4_stream.resolution:
- video_stream = mp4_stream
- else:
- video_stream = highest_quality_stream
- else:
- video_stream = youtube.streams.filter(
- progressive=False, resolution=resolution, subtype="mp4"
- ).first()
- if not video_stream:
- video_stream = youtube.streams.filter(
- progressive=False, resolution=resolution
- ).first()
- if video_stream is None:
- print(f"Could not find a stream with resolution: {resolution}")
- print("Try one of these:")
- display_streams(youtube)
- sys.exit()
-
- audio_stream = youtube.streams.get_audio_only(video_stream.subtype)
- if not audio_stream:
- audio_stream = (
- youtube.streams.filter(only_audio=True).order_by("abr").last()
- )
- if not audio_stream:
- print("Could not find an audio only stream")
- sys.exit()
- _ffmpeg_downloader(
- audio_stream=audio_stream, video_stream=video_stream, target=target
- )
-
-
-def _ffmpeg_downloader(
- audio_stream: Stream, video_stream: Stream, target: str
-) -> None:
- """
- Given a YouTube Stream object, finds the correct audio stream, downloads them both
- giving them a unique name, them uses ffmpeg to create a new file with the audio
- and video from the previously downloaded files. Then deletes the original adaptive
- streams, leaving the combination.
-
- :param Stream audio_stream:
- A valid Stream object representing the audio to download
- :param Stream video_stream:
- A valid Stream object representing the video to download
- :param Path target:
- A valid Path object
- """
- video_unique_name = _unique_name(
- safe_filename(video_stream.title),
- video_stream.subtype,
- "video",
- target=target,
- )
- audio_unique_name = _unique_name(
- safe_filename(video_stream.title),
- audio_stream.subtype,
- "audio",
- target=target,
- )
- _download(stream=video_stream, target=target, filename=video_unique_name)
- print("Loading audio...")
- _download(stream=audio_stream, target=target, filename=audio_unique_name)
-
- video_path = os.path.join(
- target, f"{video_unique_name}.{video_stream.subtype}"
- )
- audio_path = os.path.join(
- target, f"{audio_unique_name}.{audio_stream.subtype}"
- )
- final_path = os.path.join(
- target, f"{safe_filename(video_stream.title)}.{video_stream.subtype}"
- )
-
- subprocess.run( # nosec
- [
- "ffmpeg",
- "-i",
- video_path,
- "-i",
- audio_path,
- "-codec",
- "copy",
- final_path,
- ]
- )
- os.unlink(video_path)
- os.unlink(audio_path)
-
-
-def download_by_itag(
- youtube: YouTube, itag: int, target: Optional[str] = None
-) -> None:
- """Start downloading a YouTube video.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param int itag:
- YouTube format identifier code.
- :param str target:
- Target directory for download
- """
- stream = youtube.streams.get_by_itag(itag)
- if stream is None:
- print(f"Could not find a stream with itag: {itag}")
- print("Try one of these:")
- display_streams(youtube)
- sys.exit()
-
- youtube.register_on_progress_callback(on_progress)
-
- try:
- _download(stream, target=target)
- except KeyboardInterrupt:
- sys.exit()
-
-
-def download_by_resolution(
- youtube: YouTube, resolution: str, target: Optional[str] = None
-) -> None:
- """Start downloading a YouTube video.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param str resolution:
- YouTube video resolution.
- :param str target:
- Target directory for download
- """
- # TODO(nficano): allow dash itags to be selected
- stream = youtube.streams.get_by_resolution(resolution)
- if stream is None:
- print(f"Could not find a stream with resolution: {resolution}")
- print("Try one of these:")
- display_streams(youtube)
- sys.exit()
-
- youtube.register_on_progress_callback(on_progress)
-
- try:
- _download(stream, target=target)
- except KeyboardInterrupt:
- sys.exit()
-
-
-def download_highest_resolution_progressive(
- youtube: YouTube, resolution: str, target: Optional[str] = None
-) -> None:
- """Start downloading the highest resolution progressive stream.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param str resolution:
- YouTube video resolution.
- :param str target:
- Target directory for download
- """
- youtube.register_on_progress_callback(on_progress)
- try:
- stream = youtube.streams.get_highest_resolution()
- except exceptions.VideoUnavailable as err:
- print(f"No video streams available: {err}")
- else:
- try:
- _download(stream, target=target)
- except KeyboardInterrupt:
- sys.exit()
-
-
-def display_streams(youtube: YouTube) -> None:
- """Probe YouTube video and lists its available formats.
-
- :param YouTube youtube:
- A valid YouTube watch URL.
-
- """
- for stream in youtube.streams:
- print(stream)
-
-
-def _print_available_captions(captions: CaptionQuery) -> None:
- print(
- f"Available caption codes are: {', '.join(c.code for c in captions)}"
- )
-
-
-def download_caption(
- youtube: YouTube, lang_code: Optional[str], target: Optional[str] = None
-) -> None:
- """Download a caption for the YouTube video.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param str lang_code:
- Language code desired for caption file.
- Prints available codes if the value is None
- or the desired code is not available.
- :param str target:
- Target directory for download
- """
- try:
- caption = youtube.captions[lang_code]
- downloaded_path = caption.download(
- title=youtube.title, output_path=target
- )
- print(f"Saved caption file to: {downloaded_path}")
- except KeyError:
- print(f"Unable to find caption with code: {lang_code}")
- _print_available_captions(youtube.captions)
-
-
-def download_audio(
- youtube: YouTube, filetype: str, target: Optional[str] = None
-) -> None:
- """
- Given a filetype, downloads the highest quality available audio stream for a
- YouTube video.
-
- :param YouTube youtube:
- A valid YouTube object.
- :param str filetype:
- Desired file format to download.
- :param str target:
- Target directory for download
- """
- audio = (
- youtube.streams.filter(only_audio=True, subtype=filetype)
- .order_by("abr")
- .last()
- )
-
- if audio is None:
- print("No audio only stream found. Try one of these:")
- display_streams(youtube)
- sys.exit()
-
- youtube.register_on_progress_callback(on_progress)
-
- try:
- _download(audio, target=target)
- except KeyboardInterrupt:
- sys.exit()
-
-
-if __name__ == "__main__":
- main()
diff --git a/lib/pytube/pytube/contrib/__init__.py b/lib/pytube/pytube/contrib/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/lib/pytube/pytube/contrib/channel.py b/lib/pytube/pytube/contrib/channel.py
deleted file mode 100644
index 147ff7e..0000000
--- a/lib/pytube/pytube/contrib/channel.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Module for interacting with a user's youtube channel."""
-import json
-import logging
-from typing import Dict, List, Optional, Tuple
-
-from pytube import extract, Playlist, request
-from pytube.helpers import uniqueify
-
-logger = logging.getLogger(__name__)
-
-
-class Channel(Playlist):
- def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
- """Construct a :class:`Channel `.
-
- :param str url:
- A valid YouTube channel URL.
- :param proxies:
- (Optional) A dictionary of proxies to use for web requests.
- """
- super().__init__(url, proxies)
-
- self.channel_uri = extract.channel_name(url)
-
- self.channel_url = (
- f"https://www.youtube.com{self.channel_uri}"
- )
-
- self.videos_url = self.channel_url + '/videos'
- self.playlists_url = self.channel_url + '/playlists'
- self.community_url = self.channel_url + '/community'
- self.featured_channels_url = self.channel_url + '/channels'
- self.about_url = self.channel_url + '/about'
-
- # Possible future additions
- self._playlists_html = None
- self._community_html = None
- self._featured_channels_html = None
- self._about_html = None
-
- @property
- def channel_name(self):
- """Get the name of the YouTube channel.
-
- :rtype: str
- """
- return self.initial_data['metadata']['channelMetadataRenderer']['title']
-
- @property
- def channel_id(self):
- """Get the ID of the YouTube channel.
-
- This will return the underlying ID, not the vanity URL.
-
- :rtype: str
- """
- return self.initial_data['metadata']['channelMetadataRenderer']['externalId']
-
- @property
- def vanity_url(self):
- """Get the vanity URL of the YouTube channel.
-
- Returns None if it doesn't exist.
-
- :rtype: str
- """
- return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None) # noqa:E501
-
- @property
- def html(self):
- """Get the html for the /videos page.
-
- :rtype: str
- """
- if self._html:
- return self._html
- self._html = request.get(self.videos_url)
- return self._html
-
- @property
- def playlists_html(self):
- """Get the html for the /playlists page.
-
- Currently unused for any functionality.
-
- :rtype: str
- """
- if self._playlists_html:
- return self._playlists_html
- else:
- self._playlists_html = request.get(self.playlists_url)
- return self._playlists_html
-
- @property
- def community_html(self):
- """Get the html for the /community page.
-
- Currently unused for any functionality.
-
- :rtype: str
- """
- if self._community_html:
- return self._community_html
- else:
- self._community_html = request.get(self.community_url)
- return self._community_html
-
- @property
- def featured_channels_html(self):
- """Get the html for the /channels page.
-
- Currently unused for any functionality.
-
- :rtype: str
- """
- if self._featured_channels_html:
- return self._featured_channels_html
- else:
- self._featured_channels_html = request.get(self.featured_channels_url)
- return self._featured_channels_html
-
- @property
- def about_html(self):
- """Get the html for the /about page.
-
- Currently unused for any functionality.
-
- :rtype: str
- """
- if self._about_html:
- return self._about_html
- else:
- self._about_html = request.get(self.about_url)
- return self._about_html
-
- @staticmethod
- def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
- """Extracts videos from a raw json page
-
- :param str raw_json: Input json extracted from the page or the last
- server response
- :rtype: Tuple[List[str], Optional[str]]
- :returns: Tuple containing a list of up to 100 video watch ids and
- a continuation token, if more videos are available
- """
- initial_data = json.loads(raw_json)
- # this is the json tree structure, if the json was extracted from
- # html
- try:
- videos = initial_data["contents"][
- "twoColumnBrowseResultsRenderer"][
- "tabs"][1]["tabRenderer"]["content"][
- "sectionListRenderer"]["contents"][0][
- "itemSectionRenderer"]["contents"][0][
- "gridRenderer"]["items"]
- except (KeyError, IndexError, TypeError):
- try:
- # this is the json tree structure, if the json was directly sent
- # by the server in a continuation response
- important_content = initial_data[1]['response']['onResponseReceivedActions'][
- 0
- ]['appendContinuationItemsAction']['continuationItems']
- videos = important_content
- except (KeyError, IndexError, TypeError):
- try:
- # this is the json tree structure, if the json was directly sent
- # by the server in a continuation response
- # no longer a list and no longer has the "response" key
- important_content = initial_data['onResponseReceivedActions'][0][
- 'appendContinuationItemsAction']['continuationItems']
- videos = important_content
- except (KeyError, IndexError, TypeError) as p:
- logger.info(p)
- return [], None
-
- try:
- continuation = videos[-1]['continuationItemRenderer'][
- 'continuationEndpoint'
- ]['continuationCommand']['token']
- videos = videos[:-1]
- except (KeyError, IndexError):
- # if there is an error, no continuation is available
- continuation = None
-
- # remove duplicates
- return (
- uniqueify(
- list(
- # only extract the video ids from the video data
- map(
- lambda x: (
- f"/watch?v="
- f"{x['gridVideoRenderer']['videoId']}"
- ),
- videos
- )
- ),
- ),
- continuation,
- )
diff --git a/lib/pytube/pytube/contrib/playlist.py b/lib/pytube/pytube/contrib/playlist.py
deleted file mode 100644
index c55f5e9..0000000
--- a/lib/pytube/pytube/contrib/playlist.py
+++ /dev/null
@@ -1,419 +0,0 @@
-"""Module to download a complete playlist from a youtube channel."""
-import json
-import logging
-from collections.abc import Sequence
-from datetime import date, datetime
-from typing import Dict, Iterable, List, Optional, Tuple, Union
-
-from pytube import extract, request, YouTube
-from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
-
-logger = logging.getLogger(__name__)
-
-
-class Playlist(Sequence):
- """Load a YouTube playlist with URL"""
-
- def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
- if proxies:
- install_proxy(proxies)
-
- self._input_url = url
-
- # These need to be initialized as None for the properties.
- self._html = None
- self._ytcfg = None
- self._initial_data = None
- self._sidebar_info = None
-
- self._playlist_id = None
-
- @property
- def playlist_id(self):
- """Get the playlist id.
-
- :rtype: str
- """
- if self._playlist_id:
- return self._playlist_id
- self._playlist_id = extract.playlist_id(self._input_url)
- return self._playlist_id
-
- @property
- def playlist_url(self):
- """Get the base playlist url.
-
- :rtype: str
- """
- return f"https://www.youtube.com/playlist?list={self.playlist_id}"
-
- @property
- def html(self):
- """Get the playlist page html.
-
- :rtype: str
- """
- if self._html:
- return self._html
- self._html = request.get(self.playlist_url)
- return self._html
-
- @property
- def ytcfg(self):
- """Extract the ytcfg from the playlist page html.
-
- :rtype: dict
- """
- if self._ytcfg:
- return self._ytcfg
- self._ytcfg = extract.get_ytcfg(self.html)
- return self._ytcfg
-
- @property
- def initial_data(self):
- """Extract the initial data from the playlist page html.
-
- :rtype: dict
- """
- if self._initial_data:
- return self._initial_data
- else:
- self._initial_data = extract.initial_data(self.html)
- return self._initial_data
-
- @property
- def sidebar_info(self):
- """Extract the sidebar info from the playlist page html.
-
- :rtype: dict
- """
- if self._sidebar_info:
- return self._sidebar_info
- else:
- self._sidebar_info = self.initial_data['sidebar'][
- 'playlistSidebarRenderer']['items']
- return self._sidebar_info
-
- @property
- def yt_api_key(self):
- """Extract the INNERTUBE_API_KEY from the playlist ytcfg.
-
- :rtype: str
- """
- return self.ytcfg['INNERTUBE_API_KEY']
-
- def _paginate(
- self, until_watch_id: Optional[str] = None
- ) -> Iterable[List[str]]:
- """Parse the video links from the page source, yields the /watch?v=
- part from video link
-
- :param until_watch_id Optional[str]: YouTube Video watch id until
- which the playlist should be read.
-
- :rtype: Iterable[List[str]]
- :returns: Iterable of lists of YouTube watch ids
- """
- videos_urls, continuation = self._extract_videos(
- json.dumps(extract.initial_data(self.html))
- )
- if until_watch_id:
- try:
- trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
- yield videos_urls[:trim_index]
- return
- except ValueError:
- pass
- yield videos_urls
-
- # Extraction from a playlist only returns 100 videos at a time
- # if self._extract_videos returns a continuation there are more
- # than 100 songs inside a playlist, so we need to add further requests
- # to gather all of them
- if continuation:
- load_more_url, headers, data = self._build_continuation_url(continuation)
- else:
- load_more_url, headers, data = None, None, None
-
- while load_more_url and headers and data: # there is an url found
- logger.debug("load more url: %s", load_more_url)
- # requesting the next page of videos with the url generated from the
- # previous page, needs to be a post
- req = request.post(load_more_url, extra_headers=headers, data=data)
- # extract up to 100 songs from the page loaded
- # returns another continuation if more videos are available
- videos_urls, continuation = self._extract_videos(req)
- if until_watch_id:
- try:
- trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
- yield videos_urls[:trim_index]
- return
- except ValueError:
- pass
- yield videos_urls
-
- if continuation:
- load_more_url, headers, data = self._build_continuation_url(
- continuation
- )
- else:
- load_more_url, headers, data = None, None, None
-
- def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]:
- """Helper method to build the url and headers required to request
- the next page of videos
-
- :param str continuation: Continuation extracted from the json response
- of the last page
- :rtype: Tuple[str, dict, dict]
- :returns: Tuple of an url and required headers for the next http
- request
- """
- return (
- (
- # was changed to this format (and post requests)
- # between 2021.03.02 and 2021.03.03
- "https://www.youtube.com/youtubei/v1/browse?key="
- f"{self.yt_api_key}"
- ),
- {
- "X-YouTube-Client-Name": "1",
- "X-YouTube-Client-Version": "2.20200720.00.02",
- },
- # extra data required for post request
- {
- "continuation": continuation,
- "context": {
- "client": {
- "clientName": "WEB",
- "clientVersion": "2.20200720.00.02"
- }
- }
- }
- )
-
- @staticmethod
- def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
- """Extracts videos from a raw json page
-
- :param str raw_json: Input json extracted from the page or the last
- server response
- :rtype: Tuple[List[str], Optional[str]]
- :returns: Tuple containing a list of up to 100 video watch ids and
- a continuation token, if more videos are available
- """
- initial_data = json.loads(raw_json)
- try:
- # this is the json tree structure, if the json was extracted from
- # html
- section_contents = initial_data["contents"][
- "twoColumnBrowseResultsRenderer"][
- "tabs"][0]["tabRenderer"]["content"][
- "sectionListRenderer"]["contents"]
- try:
- # Playlist without submenus
- important_content = section_contents[
- 0]["itemSectionRenderer"][
- "contents"][0]["playlistVideoListRenderer"]
- except (KeyError, IndexError, TypeError):
- # Playlist with submenus
- important_content = section_contents[
- 1]["itemSectionRenderer"][
- "contents"][0]["playlistVideoListRenderer"]
- videos = important_content["contents"]
- except (KeyError, IndexError, TypeError):
- try:
- # this is the json tree structure, if the json was directly sent
- # by the server in a continuation response
- # no longer a list and no longer has the "response" key
- important_content = initial_data['onResponseReceivedActions'][0][
- 'appendContinuationItemsAction']['continuationItems']
- videos = important_content
- except (KeyError, IndexError, TypeError) as p:
- logger.info(p)
- return [], None
-
- try:
- continuation = videos[-1]['continuationItemRenderer'][
- 'continuationEndpoint'
- ]['continuationCommand']['token']
- videos = videos[:-1]
- except (KeyError, IndexError):
- # if there is an error, no continuation is available
- continuation = None
-
- # remove duplicates
- return (
- uniqueify(
- list(
- # only extract the video ids from the video data
- map(
- lambda x: (
- f"/watch?v="
- f"{x['playlistVideoRenderer']['videoId']}"
- ),
- videos
- )
- ),
- ),
- continuation,
- )
-
- def trimmed(self, video_id: str) -> Iterable[str]:
- """Retrieve a list of YouTube video URLs trimmed at the given video ID
-
- i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns
- [1,2]
- :type video_id: str
- video ID to trim the returned list of playlist URLs at
- :rtype: List[str]
- :returns:
- List of video URLs from the playlist trimmed at the given ID
- """
- for page in self._paginate(until_watch_id=video_id):
- yield from (self._video_url(watch_path) for watch_path in page)
-
- def url_generator(self):
- """Generator that yields video URLs.
-
- :Yields: Video URLs
- """
- for page in self._paginate():
- for video in page:
- yield self._video_url(video)
-
- @property # type: ignore
- @cache
- def video_urls(self) -> DeferredGeneratorList:
- """Complete links of all the videos in playlist
-
- :rtype: List[str]
- :returns: List of video URLs
- """
- return DeferredGeneratorList(self.url_generator())
-
- def videos_generator(self):
- for url in self.video_urls:
- yield YouTube(url)
-
- @property
- def videos(self) -> Iterable[YouTube]:
- """Yields YouTube objects of videos in this playlist
-
- :rtype: List[YouTube]
- :returns: List of YouTube
- """
- return DeferredGeneratorList(self.videos_generator())
-
- def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
- return self.video_urls[i]
-
- def __len__(self) -> int:
- return len(self.video_urls)
-
- def __repr__(self) -> str:
- return f"{repr(self.video_urls)}"
-
- @property
- @cache
- def last_updated(self) -> Optional[date]:
- """Extract the date that the playlist was last updated.
-
- For some playlists, this will be a specific date, which is returned as a datetime
- object. For other playlists, this is an estimate such as "1 week ago". Due to the
- fact that this value is returned as a string, pytube does a best-effort parsing
- where possible, and returns the raw string where it is not possible.
-
- :return: Date of last playlist update where possible, else the string provided
- :rtype: datetime.date
- """
- last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
- 'stats'][2]['runs'][1]['text']
- try:
- date_components = last_updated_text.split()
- month = date_components[0]
- day = date_components[1].strip(',')
- year = date_components[2]
- return datetime.strptime(
- f"{month} {day:0>2} {year}", "%b %d %Y"
- ).date()
- except (IndexError, KeyError):
- return last_updated_text
-
- @property
- @cache
- def title(self) -> Optional[str]:
- """Extract playlist title
-
- :return: playlist title (name)
- :rtype: Optional[str]
- """
- return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
- 'title']['runs'][0]['text']
-
- @property
- def description(self) -> str:
- return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
- 'description']['simpleText']
-
- @property
- def length(self):
- """Extract the number of videos in the playlist.
-
- :return: Playlist video count
- :rtype: int
- """
- count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
- 'stats'][0]['runs'][0]['text']
- count_text = count_text.replace(',','')
- return int(count_text)
-
- @property
- def views(self):
- """Extract view count for playlist.
-
- :return: Playlist view count
- :rtype: int
- """
- # "1,234,567 views"
- views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
- 'stats'][1]['simpleText']
- # "1,234,567"
- count_text = views_text.split()[0]
- # "1234567"
- count_text = count_text.replace(',', '')
- return int(count_text)
-
- @property
- def owner(self):
- """Extract the owner of the playlist.
-
- :return: Playlist owner name.
- :rtype: str
- """
- return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
- 'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
-
- @property
- def owner_id(self):
- """Extract the channel_id of the owner of the playlist.
-
- :return: Playlist owner's channel ID.
- :rtype: str
- """
- return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
- 'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
- 'navigationEndpoint']['browseEndpoint']['browseId']
-
- @property
- def owner_url(self):
- """Create the channel url of the owner of the playlist.
-
- :return: Playlist owner's channel url.
- :rtype: str
- """
- return f'https://www.youtube.com/channel/{self.owner_id}'
-
- @staticmethod
- def _video_url(watch_path: str):
- return f"https://www.youtube.com{watch_path}"
diff --git a/lib/pytube/pytube/contrib/search.py b/lib/pytube/pytube/contrib/search.py
deleted file mode 100644
index 87d60c5..0000000
--- a/lib/pytube/pytube/contrib/search.py
+++ /dev/null
@@ -1,225 +0,0 @@
-"""Module for interacting with YouTube search."""
-# Native python imports
-import logging
-
-# Local imports
-from pytube import YouTube
-from pytube.innertube import InnerTube
-
-
-logger = logging.getLogger(__name__)
-
-
-class Search:
- def __init__(self, query):
- """Initialize Search object.
-
- :param str query:
- Search query provided by the user.
- """
- self.query = query
- self._innertube_client = InnerTube(client='WEB')
-
- # The first search, without a continuation, is structured differently
- # and contains completion suggestions, so we must store this separately
- self._initial_results = None
-
- self._results = None
- self._completion_suggestions = None
-
- # Used for keeping track of query continuations so that new results
- # are always returned when get_next_results() is called
- self._current_continuation = None
-
- @property
- def completion_suggestions(self):
- """Return query autocompletion suggestions for the query.
-
- :rtype: list
- :returns:
- A list of autocomplete suggestions provided by YouTube for the query.
- """
- if self._completion_suggestions:
- return self._completion_suggestions
- if self.results:
- self._completion_suggestions = self._initial_results['refinements']
- return self._completion_suggestions
-
- @property
- def results(self):
- """Return search results.
-
- On first call, will generate and return the first set of results.
- Additional results can be generated using ``.get_next_results()``.
-
- :rtype: list
- :returns:
- A list of YouTube objects.
- """
- if self._results:
- return self._results
-
- videos, continuation = self.fetch_and_parse()
- self._results = videos
- self._current_continuation = continuation
- return self._results
-
- def get_next_results(self):
- """Use the stored continuation string to fetch the next set of results.
-
- This method does not return the results, but instead updates the results property.
- """
- if self._current_continuation:
- videos, continuation = self.fetch_and_parse(self._current_continuation)
- self._results.extend(videos)
- self._current_continuation = continuation
- else:
- raise IndexError
-
- def fetch_and_parse(self, continuation=None):
- """Fetch from the innertube API and parse the results.
-
- :param str continuation:
- Continuation string for fetching results.
- :rtype: tuple
- :returns:
- A tuple of a list of YouTube objects and a continuation string.
- """
- # Begin by executing the query and identifying the relevant sections
- # of the results
- raw_results = self.fetch_query(continuation)
-
- # Initial result is handled by try block, continuations by except block
- try:
- sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
- 'primaryContents']['sectionListRenderer']['contents']
- except KeyError:
- sections = raw_results['onResponseReceivedCommands'][0][
- 'appendContinuationItemsAction']['continuationItems']
- item_renderer = None
- continuation_renderer = None
- for s in sections:
- if 'itemSectionRenderer' in s:
- item_renderer = s['itemSectionRenderer']
- if 'continuationItemRenderer' in s:
- continuation_renderer = s['continuationItemRenderer']
-
- # If the continuationItemRenderer doesn't exist, assume no further results
- if continuation_renderer:
- next_continuation = continuation_renderer['continuationEndpoint'][
- 'continuationCommand']['token']
- else:
- next_continuation = None
-
- # If the itemSectionRenderer doesn't exist, assume no results.
- if item_renderer:
- videos = []
- raw_video_list = item_renderer['contents']
- for video_details in raw_video_list:
- # Skip over ads
- if video_details.get('searchPyvRenderer', {}).get('ads', None):
- continue
-
- # Skip "recommended" type videos e.g. "people also watched" and "popular X"
- # that break up the search results
- if 'shelfRenderer' in video_details:
- continue
-
- # Skip auto-generated "mix" playlist results
- if 'radioRenderer' in video_details:
- continue
-
- # Skip playlist results
- if 'playlistRenderer' in video_details:
- continue
-
- # Skip channel results
- if 'channelRenderer' in video_details:
- continue
-
- # Skip 'people also searched for' results
- if 'horizontalCardListRenderer' in video_details:
- continue
-
- # Can't seem to reproduce, probably related to typo fix suggestions
- if 'didYouMeanRenderer' in video_details:
- continue
-
- # Seems to be the renderer used for the image shown on a no results page
- if 'backgroundPromoRenderer' in video_details:
- continue
-
- if 'videoRenderer' not in video_details:
- logger.warning('Unexpected renderer encountered.')
- logger.warning(f'Renderer name: {video_details.keys()}')
- logger.warning(f'Search term: {self.query}')
- logger.warning(
- 'Please open an issue at '
- 'https://github.com/pytube/pytube/issues '
- 'and provide this log output.'
- )
- continue
-
- # Extract relevant video information from the details.
- # Some of this can be used to pre-populate attributes of the
- # YouTube object.
- vid_renderer = video_details['videoRenderer']
- vid_id = vid_renderer['videoId']
- vid_url = f'https://www.youtube.com/watch?v={vid_id}'
- vid_title = vid_renderer['title']['runs'][0]['text']
- vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
- vid_channel_uri = vid_renderer['ownerText']['runs'][0][
- 'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- # Livestreams have "runs", non-livestreams have "simpleText",
- # and scheduled releases do not have 'viewCountText'
- if 'viewCountText' in vid_renderer:
- if 'runs' in vid_renderer['viewCountText']:
- vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
- else:
- vid_view_count_text = vid_renderer['viewCountText']['simpleText']
- # Strip ' views' text, then remove commas
- stripped_text = vid_view_count_text.split()[0].replace(',','')
- if stripped_text == 'No':
- vid_view_count = 0
- else:
- vid_view_count = int(stripped_text)
- else:
- vid_view_count = 0
- if 'lengthText' in vid_renderer:
- vid_length = vid_renderer['lengthText']['simpleText']
- else:
- vid_length = None
-
- vid_metadata = {
- 'id': vid_id,
- 'url': vid_url,
- 'title': vid_title,
- 'channel_name': vid_channel_name,
- 'channel_url': vid_channel_uri,
- 'view_count': vid_view_count,
- 'length': vid_length
- }
-
- # Construct YouTube object from metadata and append to results
- vid = YouTube(vid_metadata['url'])
- vid.author = vid_metadata['channel_name']
- vid.title = vid_metadata['title']
- videos.append(vid)
- else:
- videos = None
-
- return videos, next_continuation
-
- def fetch_query(self, continuation=None):
- """Fetch raw results from the innertube API.
-
- :param str continuation:
- Continuation string for fetching results.
- :rtype: dict
- :returns:
- The raw json object returned by the innertube API.
- """
- query_results = self._innertube_client.search(self.query, continuation)
- if not self._initial_results:
- self._initial_results = query_results
- return query_results # noqa:R504
diff --git a/lib/pytube/pytube/exceptions.py b/lib/pytube/pytube/exceptions.py
deleted file mode 100644
index ec44d2a..0000000
--- a/lib/pytube/pytube/exceptions.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""Library specific exception definitions."""
-from typing import Pattern, Union
-
-
-class PytubeError(Exception):
- """Base pytube exception that all others inherit.
-
- This is done to not pollute the built-in exceptions, which *could* result
- in unintended errors being unexpectedly and incorrectly handled within
- implementers code.
- """
-
-
-class MaxRetriesExceeded(PytubeError):
- """Maximum number of retries exceeded."""
-
-
-class HTMLParseError(PytubeError):
- """HTML could not be parsed"""
-
-
-class ExtractError(PytubeError):
- """Data extraction based exception."""
-
-
-class RegexMatchError(ExtractError):
- """Regex pattern did not return any matches."""
-
- def __init__(self, caller: str, pattern: Union[str, Pattern]):
- """
- :param str caller:
- Calling function
- :param str pattern:
- Pattern that failed to match
- """
- super().__init__(f"{caller}: could not find match for {pattern}")
- self.caller = caller
- self.pattern = pattern
-
-
-class VideoUnavailable(PytubeError):
- """Base video unavailable error."""
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.error_string)
-
- @property
- def error_string(self):
- return f'{self.video_id} is unavailable'
-
-
-class AgeRestrictedError(VideoUnavailable):
- """Video is age restricted, and cannot be accessed without OAuth."""
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f"{self.video_id} is age restricted, and can't be accessed without logging in."
-
-
-class LiveStreamError(VideoUnavailable):
- """Video is a live stream."""
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f'{self.video_id} is streaming live and cannot be loaded'
-
-
-class VideoPrivate(VideoUnavailable):
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f'{self.video_id} is a private video'
-
-
-class RecordingUnavailable(VideoUnavailable):
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f'{self.video_id} does not have a live stream recording available'
-
-
-class MembersOnly(VideoUnavailable):
- """Video is members-only.
-
- YouTube has special videos that are only viewable to users who have
- subscribed to a content creator.
- ref: https://support.google.com/youtube/answer/7544492?hl=en
- """
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f'{self.video_id} is a members-only video'
-
-
-class VideoRegionBlocked(VideoUnavailable):
- def __init__(self, video_id: str):
- """
- :param str video_id:
- A YouTube video identifier.
- """
- self.video_id = video_id
- super().__init__(self.video_id)
-
- @property
- def error_string(self):
- return f'{self.video_id} is not available in your region'
diff --git a/lib/pytube/pytube/extract.py b/lib/pytube/pytube/extract.py
deleted file mode 100644
index d083214..0000000
--- a/lib/pytube/pytube/extract.py
+++ /dev/null
@@ -1,579 +0,0 @@
-"""This module contains all non-cipher related data extraction logic."""
-import logging
-import urllib.parse
-import re
-from collections import OrderedDict
-from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import parse_qs, quote, urlencode, urlparse
-
-from pytube.cipher import Cipher
-from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
-from pytube.helpers import regex_search
-from pytube.metadata import YouTubeMetadata
-from pytube.parser import parse_for_object, parse_for_all_objects
-
-
-logger = logging.getLogger(__name__)
-
-
-def publish_date(watch_html: str):
- """Extract publish date
- :param str watch_html:
- The html contents of the watch page.
- :rtype: str
- :returns:
- Publish date of the video.
- """
- try:
- result = regex_search(
- r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}",
- watch_html, group=0
- )
- except RegexMatchError:
- return None
- return datetime.strptime(result, '%Y-%m-%d')
-
-
-def recording_available(watch_html):
- """Check if live stream recording is available.
-
- :param str watch_html:
- The html contents of the watch page.
- :rtype: bool
- :returns:
- Whether or not the content is private.
- """
- unavailable_strings = [
- 'This live stream recording is not available.'
- ]
- for string in unavailable_strings:
- if string in watch_html:
- return False
- return True
-
-
-def is_private(watch_html):
- """Check if content is private.
-
- :param str watch_html:
- The html contents of the watch page.
- :rtype: bool
- :returns:
- Whether or not the content is private.
- """
- private_strings = [
- "This is a private video. Please sign in to verify that you may see it.",
- "\"simpleText\":\"Private video\"",
- "This video is private."
- ]
- for string in private_strings:
- if string in watch_html:
- return True
- return False
-
-
-def is_age_restricted(watch_html: str) -> bool:
- """Check if content is age restricted.
-
- :param str watch_html:
- The html contents of the watch page.
- :rtype: bool
- :returns:
- Whether or not the content is age restricted.
- """
- try:
- regex_search(r"og:restrictions:age", watch_html, group=0)
- except RegexMatchError:
- return False
- return True
-
-
-def playability_status(watch_html: str) -> (str, str):
- """Return the playability status and status explanation of a video.
-
- For example, a video may have a status of LOGIN_REQUIRED, and an explanation
- of "This is a private video. Please sign in to verify that you may see it."
-
- This explanation is what gets incorporated into the media player overlay.
-
- :param str watch_html:
- The html contents of the watch page.
- :rtype: bool
- :returns:
- Playability status and reason of the video.
- """
- player_response = initial_player_response(watch_html)
- status_dict = player_response.get('playabilityStatus', {})
- if 'liveStreamability' in status_dict:
- return 'LIVE_STREAM', 'Video is a live stream.'
- if 'status' in status_dict:
- if 'reason' in status_dict:
- return status_dict['status'], [status_dict['reason']]
- if 'messages' in status_dict:
- return status_dict['status'], status_dict['messages']
- return None, [None]
-
-
-def video_id(url: str) -> str:
- """Extract the ``video_id`` from a YouTube url.
-
- This function supports the following patterns:
-
- - :samp:`https://youtube.com/watch?v={video_id}`
- - :samp:`https://youtube.com/embed/{video_id}`
- - :samp:`https://youtu.be/{video_id}`
-
- :param str url:
- A YouTube url containing a video id.
- :rtype: str
- :returns:
- YouTube video id.
- """
- return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
-
-
-def playlist_id(url: str) -> str:
- """Extract the ``playlist_id`` from a YouTube url.
-
- This function supports the following patterns:
-
- - :samp:`https://youtube.com/playlist?list={playlist_id}`
- - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`
-
- :param str url:
- A YouTube url containing a playlist id.
- :rtype: str
- :returns:
- YouTube playlist id.
- """
- parsed = urllib.parse.urlparse(url)
- return parse_qs(parsed.query)['list'][0]
-
-
-def channel_name(url: str) -> str:
- """Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
-
- This function supports the following patterns:
-
- - :samp:`https://youtube.com/c/{channel_name}/*`
- - :samp:`https://youtube.com/channel/{channel_id}/*
- - :samp:`https://youtube.com/u/{channel_name}/*`
- - :samp:`https://youtube.com/user/{channel_id}/*
-
- :param str url:
- A YouTube url containing a channel name.
- :rtype: str
- :returns:
- YouTube channel name.
- """
- patterns = [
- r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
- r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
- r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
- r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"
- ]
- for pattern in patterns:
- regex = re.compile(pattern)
- function_match = regex.search(url)
- if function_match:
- logger.debug("finished regex search, matched: %s", pattern)
- uri_style = function_match.group(1)
- uri_identifier = function_match.group(2)
- return f'/{uri_style}/{uri_identifier}'
-
- raise RegexMatchError(
- caller="channel_name", pattern="patterns"
- )
-
-
-def video_info_url(video_id: str, watch_url: str) -> str:
- """Construct the video_info url.
-
- :param str video_id:
- A YouTube video identifier.
- :param str watch_url:
- A YouTube watch url.
- :rtype: str
- :returns:
- :samp:`https://youtube.com/get_video_info` with necessary GET
- parameters.
- """
- params = OrderedDict(
- [
- ("video_id", video_id),
- ("ps", "default"),
- ("eurl", quote(watch_url)),
- ("hl", "en_US"),
- ("html5", "1"),
- ("c", "TVHTML5"),
- ("cver", "7.20201028"),
- ]
- )
- return _video_info_url(params)
-
-
-def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
- """Construct the video_info url.
-
- :param str video_id:
- A YouTube video identifier.
- :param str embed_html:
- The html contents of the embed page (for age restricted videos).
- :rtype: str
- :returns:
- :samp:`https://youtube.com/get_video_info` with necessary GET
- parameters.
- """
- try:
- sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
- except RegexMatchError:
- sts = ""
- # Here we use ``OrderedDict`` so that the output is consistent between
- # Python 2.7+.
- eurl = f"https://youtube.googleapis.com/v/{video_id}"
- params = OrderedDict(
- [
- ("video_id", video_id),
- ("eurl", eurl),
- ("sts", sts),
- ("html5", "1"),
- ("c", "TVHTML5"),
- ("cver", "7.20201028"),
- ]
- )
- return _video_info_url(params)
-
-
-def _video_info_url(params: OrderedDict) -> str:
- return "https://www.youtube.com/get_video_info?" + urlencode(params)
-
-
-def js_url(html: str) -> str:
- """Get the base JavaScript url.
-
- Construct the base JavaScript url, which contains the decipher
- "transforms".
-
- :param str html:
- The html contents of the watch page.
- """
- try:
- base_js = get_ytplayer_config(html)['assets']['js']
- except (KeyError, RegexMatchError):
- base_js = get_ytplayer_js(html)
- return "https://youtube.com" + base_js
-
-
-def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
- """Parse the type data.
-
- Breaks up the data in the ``type`` key of the manifest, which contains the
- mime type and codecs serialized together, and splits them into separate
- elements.
-
- **Example**:
-
- mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
-
- :param str mime_type_codec:
- String containing mime type and codecs.
- :rtype: tuple
- :returns:
- The mime type and a list of codecs.
-
- """
- pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
- regex = re.compile(pattern)
- results = regex.search(mime_type_codec)
- if not results:
- raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
- mime_type, codecs = results.groups()
- return mime_type, [c.strip() for c in codecs.split(",")]
-
-
-def get_ytplayer_js(html: str) -> Any:
- """Get the YouTube player base JavaScript path.
-
- :param str html
- The html contents of the watch page.
- :rtype: str
- :returns:
- Path to YouTube's base.js file.
- """
- js_url_patterns = [
- r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
- ]
- for pattern in js_url_patterns:
- regex = re.compile(pattern)
- function_match = regex.search(html)
- if function_match:
- logger.debug("finished regex search, matched: %s", pattern)
- yt_player_js = function_match.group(1)
- return yt_player_js
-
- raise RegexMatchError(
- caller="get_ytplayer_js", pattern="js_url_patterns"
- )
-
-
-def get_ytplayer_config(html: str) -> Any:
- """Get the YouTube player configuration data from the watch html.
-
- Extract the ``ytplayer_config``, which is json data embedded within the
- watch html and serves as the primary source of obtaining the stream
- manifest data.
-
- :param str html:
- The html contents of the watch page.
- :rtype: str
- :returns:
- Substring of the html containing the encoded manifest data.
- """
- logger.debug("finding initial function name")
- config_patterns = [
- r"ytplayer\.config\s*=\s*",
- r"ytInitialPlayerResponse\s*=\s*"
- ]
- for pattern in config_patterns:
- # Try each pattern consecutively if they don't find a match
- try:
- return parse_for_object(html, pattern)
- except HTMLParseError as e:
- logger.debug(f'Pattern failed: {pattern}')
- logger.debug(e)
- continue
-
- # setConfig() needs to be handled a little differently.
- # We want to parse the entire argument to setConfig()
- # and use then load that as json to find PLAYER_CONFIG
- # inside of it.
- setconfig_patterns = [
- r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*"
- ]
- for pattern in setconfig_patterns:
- # Try each pattern consecutively if they don't find a match
- try:
- return parse_for_object(html, pattern)
- except HTMLParseError:
- continue
-
- raise RegexMatchError(
- caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
- )
-
-
-def get_ytcfg(html: str) -> str:
- """Get the entirety of the ytcfg object.
-
- This is built over multiple pieces, so we have to find all matches and
- combine the dicts together.
-
- :param str html:
- The html contents of the watch page.
- :rtype: str
- :returns:
- Substring of the html containing the encoded manifest data.
- """
- ytcfg = {}
- ytcfg_patterns = [
- r"ytcfg\s=\s",
- r"ytcfg\.set\("
- ]
- for pattern in ytcfg_patterns:
- # Try each pattern consecutively and try to build a cohesive object
- try:
- found_objects = parse_for_all_objects(html, pattern)
- for obj in found_objects:
- ytcfg.update(obj)
- except HTMLParseError:
- continue
-
- if len(ytcfg) > 0:
- return ytcfg
-
- raise RegexMatchError(
- caller="get_ytcfg", pattern="ytcfg_pattenrs"
- )
-
-
-def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str) -> None:
- """Apply the decrypted signature to the stream manifest.
-
- :param dict stream_manifest:
- Details of the media streams available.
- :param str js:
- The contents of the base.js asset file.
-
- """
- cipher = Cipher(js=js)
-
- for i, stream in enumerate(stream_manifest):
- try:
- url: str = stream["url"]
- except KeyError:
- live_stream = (
- vid_info.get("playabilityStatus", {},)
- .get("liveStreamability")
- )
- if live_stream:
- raise LiveStreamError("UNKNOWN")
- # 403 Forbidden fix.
- if "signature" in url or (
- "s" not in stream and ("&sig=" in url or "&lsig=" in url)
- ):
- # For certain videos, YouTube will just provide them pre-signed, in
- # which case there's no real magic to download them and we can skip
- # the whole signature descrambling entirely.
- logger.debug("signature found, skip decipher")
- continue
-
- signature = cipher.get_signature(ciphered_signature=stream["s"])
-
- logger.debug(
- "finished descrambling signature for itag=%s", stream["itag"]
- )
- parsed_url = urlparse(url)
-
- # Convert query params off url to dict
- query_params = parse_qs(urlparse(url).query)
- query_params = {
- k: v[0] for k,v in query_params.items()
- }
- query_params['sig'] = signature
- if 'ratebypass' not in query_params.keys():
- # Cipher n to get the updated value
-
- initial_n = list(query_params['n'])
- new_n = cipher.calculate_n(initial_n)
- query_params['n'] = new_n
-
- url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}' # noqa:E501
-
- # 403 forbidden fix
- stream_manifest[i]["url"] = url
-
-
-def apply_descrambler(stream_data: Dict) -> None:
- """Apply various in-place transforms to YouTube's media stream data.
-
- Creates a ``list`` of dictionaries by string splitting on commas, then
- taking each list item, parsing it as a query string, converting it to a
- ``dict`` and unquoting the value.
-
- :param dict stream_data:
- Dictionary containing query string encoded values.
-
- **Example**:
-
- >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
- >>> apply_descrambler(d, 'foo')
- >>> print(d)
- {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
-
- """
- if 'url' in stream_data:
- return None
-
- # Merge formats and adaptiveFormats into a single list
- formats = []
- if 'formats' in stream_data.keys():
- formats.extend(stream_data['formats'])
- if 'adaptiveFormats' in stream_data.keys():
- formats.extend(stream_data['adaptiveFormats'])
-
- # Extract url and s from signatureCiphers as necessary
- for data in formats:
- if 'url' not in data:
- if 'signatureCipher' in data:
- cipher_url = parse_qs(data['signatureCipher'])
- data['url'] = cipher_url['url'][0]
- data['s'] = cipher_url['s'][0]
- data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
-
- logger.debug("applying descrambler")
- return formats
-
-
-def initial_data(watch_html: str) -> str:
- """Extract the ytInitialData json from the watch_html page.
-
- This mostly contains metadata necessary for rendering the page on-load,
- such as video information, copyright notices, etc.
-
- @param watch_html: Html of the watch page
- @return:
- """
- patterns = [
- r"window\[['\"]ytInitialData['\"]]\s*=\s*",
- r"ytInitialData\s*=\s*"
- ]
- for pattern in patterns:
- try:
- return parse_for_object(watch_html, pattern)
- except HTMLParseError:
- pass
-
- raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
-
-
-def initial_player_response(watch_html: str) -> str:
- """Extract the ytInitialPlayerResponse json from the watch_html page.
-
- This mostly contains metadata necessary for rendering the page on-load,
- such as video information, copyright notices, etc.
-
- @param watch_html: Html of the watch page
- @return:
- """
- patterns = [
- r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*",
- r"ytInitialPlayerResponse\s*=\s*"
- ]
- for pattern in patterns:
- try:
- return parse_for_object(watch_html, pattern)
- except HTMLParseError:
- pass
-
- raise RegexMatchError(
- caller='initial_player_response',
- pattern='initial_player_response_pattern'
- )
-
-
-def metadata(initial_data) -> Optional[YouTubeMetadata]:
- """Get the informational metadata for the video.
-
- e.g.:
- [
- {
- 'Song': '강남스타일(Gangnam Style)',
- 'Artist': 'PSY',
- 'Album': 'PSY SIX RULES Pt.1',
- 'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
- }
- ]
-
- :rtype: YouTubeMetadata
- """
- try:
- metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][
- "results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][
- "metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
- except (KeyError, IndexError):
- # If there's an exception accessing this data, it probably doesn't exist.
- return YouTubeMetadata([])
-
- # Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer"
- # and we only care about the former, so we filter the others
- metadata_rows = filter(
- lambda x: "metadataRowRenderer" in x.keys(),
- metadata_rows
- )
-
- # We then access the metadataRowRenderer key in each element
- # and build a metadata object from this new list
- metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows]
-
- return YouTubeMetadata(metadata_rows)
diff --git a/lib/pytube/pytube/helpers.py b/lib/pytube/pytube/helpers.py
deleted file mode 100644
index 4cf02eb..0000000
--- a/lib/pytube/pytube/helpers.py
+++ /dev/null
@@ -1,335 +0,0 @@
-"""Various helper functions implemented by pytube."""
-import functools
-import gzip
-import json
-import logging
-import os
-import re
-import warnings
-from typing import Any, Callable, Dict, List, Optional, TypeVar
-from urllib import request
-
-from pytube.exceptions import RegexMatchError
-
-logger = logging.getLogger(__name__)
-
-
-class DeferredGeneratorList:
- """A wrapper class for deferring list generation.
-
- Pytube has some continuation generators that create web calls, which means
- that any time a full list is requested, all of those web calls must be
- made at once, which could lead to slowdowns. This will allow individual
- elements to be queried, so that slowdowns only happen as necessary. For
- example, you can iterate over elements in the list without accessing them
- all simultaneously. This should allow for speed improvements for playlist
- and channel interactions.
- """
- def __init__(self, generator):
- """Construct a :class:`DeferredGeneratorList `.
-
- :param generator generator:
- The deferrable generator to create a wrapper for.
- :param func func:
- (Optional) A function to call on the generator items to produce the list.
- """
- self.gen = generator
- self._elements = []
-
- def __eq__(self, other):
- """We want to mimic list behavior for comparison."""
- return list(self) == other
-
- def __getitem__(self, key) -> Any:
- """Only generate items as they're asked for."""
- # We only allow querying with indexes.
- if not isinstance(key, (int, slice)):
- raise TypeError('Key must be either a slice or int.')
-
- # Convert int keys to slice
- key_slice = key
- if isinstance(key, int):
- key_slice = slice(key, key + 1, 1)
-
- # Generate all elements up to the final item
- while len(self._elements) < key_slice.stop:
- try:
- next_item = next(self.gen)
- except StopIteration:
- # If we can't find enough elements for the slice, raise an IndexError
- raise IndexError
- else:
- self._elements.append(next_item)
-
- return self._elements[key]
-
- def __iter__(self):
- """Custom iterator for dynamically generated list."""
- iter_index = 0
- while True:
- try:
- curr_item = self[iter_index]
- except IndexError:
- return
- else:
- yield curr_item
- iter_index += 1
-
- def __next__(self) -> Any:
- """Fetch next element in iterator."""
- try:
- curr_element = self[self.iter_index]
- except IndexError:
- raise StopIteration
- self.iter_index += 1
- return curr_element # noqa:R504
-
- def __len__(self) -> int:
- """Return length of list of all items."""
- self.generate_all()
- return len(self._elements)
-
- def __repr__(self) -> str:
- """String representation of all items."""
- self.generate_all()
- return str(self._elements)
-
- def __reversed__(self):
- self.generate_all()
- return self._elements[::-1]
-
- def generate_all(self):
- """Generate all items."""
- while True:
- try:
- next_item = next(self.gen)
- except StopIteration:
- break
- else:
- self._elements.append(next_item)
-
-
-def regex_search(pattern: str, string: str, group: int) -> str:
- """Shortcut method to search a string for a given pattern.
-
- :param str pattern:
- A regular expression pattern.
- :param str string:
- A target string to search.
- :param int group:
- Index of group to return.
- :rtype:
- str or tuple
- :returns:
- Substring pattern matches.
- """
- regex = re.compile(pattern)
- results = regex.search(string)
- if not results:
- raise RegexMatchError(caller="regex_search", pattern=pattern)
-
- logger.debug("matched regex search: %s", pattern)
-
- return results.group(group)
-
-
-def safe_filename(s: str, max_length: int = 255) -> str:
- """Sanitize a string making it safe to use as a filename.
-
- This function was based off the limitations outlined here:
- https://en.wikipedia.org/wiki/Filename.
-
- :param str s:
- A string to make safe for use as a file name.
- :param int max_length:
- The maximum filename character length.
- :rtype: str
- :returns:
- A sanitized string.
- """
- # Characters in range 0-31 (0x00-0x1F) are not allowed in ntfs filenames.
- ntfs_characters = [chr(i) for i in range(0, 31)]
- characters = [
- r'"',
- r"\#",
- r"\$",
- r"\%",
- r"'",
- r"\*",
- r"\,",
- r"\.",
- r"\/",
- r"\:",
- r'"',
- r"\;",
- r"\<",
- r"\>",
- r"\?",
- r"\\",
- r"\^",
- r"\|",
- r"\~",
- r"\\\\",
- ]
- pattern = "|".join(ntfs_characters + characters)
- regex = re.compile(pattern, re.UNICODE)
- filename = regex.sub("", s)
- return filename[:max_length].rsplit(" ", 0)[0]
-
-
-def setup_logger(level: int = logging.ERROR, log_filename: Optional[str] = None) -> None:
- """Create a configured instance of logger.
-
- :param int level:
- Describe the severity level of the logs to handle.
- """
- fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
- date_fmt = "%H:%M:%S"
- formatter = logging.Formatter(fmt, datefmt=date_fmt)
-
- # https://github.com/pytube/pytube/issues/163
- logger = logging.getLogger("pytube")
- logger.setLevel(level)
-
- stream_handler = logging.StreamHandler()
- stream_handler.setFormatter(formatter)
- logger.addHandler(stream_handler)
-
- if log_filename is not None:
- file_handler = logging.FileHandler(log_filename)
- file_handler.setFormatter(formatter)
- logger.addHandler(file_handler)
-
-
-GenericType = TypeVar("GenericType")
-
-
-def cache(func: Callable[..., GenericType]) -> GenericType:
- """ mypy compatible annotation wrapper for lru_cache"""
- return functools.lru_cache()(func) # type: ignore
-
-
-def deprecated(reason: str) -> Callable:
- """
- This is a decorator which can be used to mark functions
- as deprecated. It will result in a warning being emitted
- when the function is used.
- """
-
- def decorator(func1):
- message = "Call to deprecated function {name} ({reason})."
-
- @functools.wraps(func1)
- def new_func1(*args, **kwargs):
- warnings.simplefilter("always", DeprecationWarning)
- warnings.warn(
- message.format(name=func1.__name__, reason=reason),
- category=DeprecationWarning,
- stacklevel=2,
- )
- warnings.simplefilter("default", DeprecationWarning)
- return func1(*args, **kwargs)
-
- return new_func1
-
- return decorator
-
-
-def target_directory(output_path: Optional[str] = None) -> str:
- """
- Function for determining target directory of a download.
- Returns an absolute path (if relative one given) or the current
- path (if none given). Makes directory if it does not exist.
-
- :type output_path: str
- :rtype: str
- :returns:
- An absolute directory path as a string.
- """
- if output_path:
- if not os.path.isabs(output_path):
- output_path = os.path.join(os.getcwd(), output_path)
- else:
- output_path = os.getcwd()
- os.makedirs(output_path, exist_ok=True)
- return output_path
-
-
-def install_proxy(proxy_handler: Dict[str, str]) -> None:
- proxy_support = request.ProxyHandler(proxy_handler)
- opener = request.build_opener(proxy_support)
- request.install_opener(opener)
-
-
-def uniqueify(duped_list: List) -> List:
- """Remove duplicate items from a list, while maintaining list order.
-
- :param List duped_list
- List to remove duplicates from
-
- :return List result
- De-duplicated list
- """
- seen: Dict[Any, bool] = {}
- result = []
- for item in duped_list:
- if item in seen:
- continue
- seen[item] = True
- result.append(item)
- return result
-
-
-def generate_all_html_json_mocks():
- """Regenerate the video mock json files for all current test videos.
-
- This should automatically output to the test/mocks directory.
- """
- test_vid_ids = [
- '2lAe1cqCOXo',
- '5YceQ8YqYMc',
- 'irauhITDrsE',
- 'm8uHb5jIGN8',
- 'QRS8MkLhQmM',
- 'WXxV9g7lsFE'
- ]
- for vid_id in test_vid_ids:
- create_mock_html_json(vid_id)
-
-
-def create_mock_html_json(vid_id) -> Dict[str, Any]:
- """Generate a json.gz file with sample html responses.
-
- :param str vid_id
- YouTube video id
-
- :return dict data
- Dict used to generate the json.gz file
- """
- from pytube import YouTube
- gzip_filename = 'yt-video-%s-html.json.gz' % vid_id
-
- # Get the pytube directory in order to navigate to /tests/mocks
- pytube_dir_path = os.path.abspath(
- os.path.join(
- os.path.dirname(__file__),
- os.path.pardir
- )
- )
- pytube_mocks_path = os.path.join(pytube_dir_path, 'tests', 'mocks')
- gzip_filepath = os.path.join(pytube_mocks_path, gzip_filename)
-
- yt = YouTube(f'https://www.youtube.com/watch?v={vid_id}')
- html_data = {
- 'url': yt.watch_url,
- 'js': yt.js,
- 'embed_html': yt.embed_html,
- 'watch_html': yt.watch_html,
- 'vid_info': yt.vid_info
- }
-
- logger.info(f'Outputing json.gz file to {gzip_filepath}')
- with gzip.open(gzip_filepath, 'wb') as f:
- f.write(json.dumps(html_data).encode('utf-8'))
-
- return html_data
diff --git a/lib/pytube/pytube/innertube.py b/lib/pytube/pytube/innertube.py
deleted file mode 100644
index f1af0f6..0000000
--- a/lib/pytube/pytube/innertube.py
+++ /dev/null
@@ -1,507 +0,0 @@
-"""This module is designed to interact with the innertube API.
-
-This module is NOT intended to be used directly by end users, as each of the
-interfaces returns raw results. These should instead be parsed to extract
-the useful information for the end user.
-"""
-# Native python imports
-import json
-import os
-import pathlib
-import time
-from urllib import parse
-
-# Local imports
-from pytube import request
-
-# YouTube on TV client secrets
-_client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
-_client_secret = 'SboVhoG9s0rNafixCSGGKXAT'
-
-# Extracted API keys -- unclear what these are linked to.
-_api_keys = [
- 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- 'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30',
- 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
- 'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los',
- 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
- 'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k'
-]
-
-_default_clients = {
- 'WEB': {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20200720.00.02'
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'ANDROID': {
- 'context': {
- 'client': {
- 'clientName': 'ANDROID',
- 'clientVersion': '17.31.35',
- 'androidSdkVersion': 30
- }
- },
- 'header': {
- 'User-Agent': 'com.google.android.youtube/',
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'IOS': {
- 'context': {
- 'client': {
- 'clientName': 'IOS',
- 'clientVersion': '17.33.2',
- 'deviceModel': 'iPhone14,3'
- }
- },
- 'header': {
- 'User-Agent': 'com.google.ios.youtube/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-
- 'WEB_EMBED': {
- 'context': {
- 'client': {
- 'clientName': 'WEB_EMBEDDED_PLAYER',
- 'clientVersion': '2.20210721.00.00',
- 'clientScreen': 'EMBED'
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'ANDROID_EMBED': {
- 'context': {
- 'client': {
- 'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '17.31.35',
- 'clientScreen': 'EMBED',
- 'androidSdkVersion': 30,
- }
- },
- 'header': {
- 'User-Agent': 'com.google.android.youtube/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'IOS_EMBED': {
- 'context': {
- 'client': {
- 'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '17.33.2',
- 'deviceModel': 'iPhone14,3'
- }
- },
- 'header': {
- 'User-Agent': 'com.google.ios.youtube/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-
- 'WEB_MUSIC': {
- 'context': {
- 'client': {
- 'clientName': 'WEB_REMIX',
- 'clientVersion': '1.20220727.01.00',
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'ANDROID_MUSIC': {
- 'context': {
- 'client': {
- 'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '5.16.51',
- 'androidSdkVersion': 30
- }
- },
- 'header': {
- 'User-Agent': 'com.google.android.apps.youtube.music/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'IOS_MUSIC': {
- 'context': {
- 'client': {
- 'clientName': 'IOS_MUSIC',
- 'clientVersion': '5.21',
- 'deviceModel': 'iPhone14,3'
- }
- },
- 'header': {
- 'User-Agent': 'com.google.ios.youtubemusic/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-
- 'WEB_CREATOR': {
- 'context': {
- 'client': {
- 'clientName': 'WEB_CREATOR',
- 'clientVersion': '1.20220726.00.00',
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'ANDROID_CREATOR': {
- 'context': {
- 'client': {
- 'clientName': 'ANDROID_CREATOR',
- 'clientVersion': '22.30.100',
- 'androidSdkVersion': 30,
- }
- },
- 'header': {
- 'User-Agent': 'com.google.android.apps.youtube.creator/',
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
- 'IOS_CREATOR': {
- 'context': {
- 'client': {
- 'clientName': 'IOS_CREATOR',
- 'clientVersion': '22.33.101',
- 'deviceModel': 'iPhone14,3',
- }
- },
- 'header': {
- 'User-Agent': 'com.google.ios.ytcreator/'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-
- 'MWEB': {
- 'context': {
- 'client': {
- 'clientName': 'MWEB',
- 'clientVersion': '2.20220801.00.00',
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-
- 'TV_EMBED': {
- 'context': {
- 'client': {
- 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
- 'clientVersion': '2.0',
- }
- },
- 'header': {
- 'User-Agent': 'Mozilla/5.0'
- },
- 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- },
-}
-_token_timeout = 1800
-_cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__'
-_token_file = os.path.join(_cache_dir, 'tokens.json')
-
-
-class InnerTube:
- """Object for interacting with the innertube API."""
- def __init__(self, client='ANDROID_MUSIC', use_oauth=False, allow_cache=True):
- """Initialize an InnerTube object.
-
- :param str client:
- Client to use for the object.
- Default to web because it returns the most playback types.
- :param bool use_oauth:
- Whether or not to authenticate to YouTube.
- :param bool allow_cache:
- Allows caching of oauth tokens on the machine.
- """
- self.context = _default_clients[client]['context']
- self.header = _default_clients[client]['header']
- self.api_key = _default_clients[client]['api_key']
- self.access_token = None
- self.refresh_token = None
- self.use_oauth = use_oauth
- self.allow_cache = allow_cache
-
- # Stored as epoch time
- self.expires = None
-
- # Try to load from file if specified
- if self.use_oauth and self.allow_cache:
- # Try to load from file if possible
- if os.path.exists(_token_file):
- with open(_token_file) as f:
- data = json.load(f)
- self.access_token = data['access_token']
- self.refresh_token = data['refresh_token']
- self.expires = data['expires']
- self.refresh_bearer_token()
-
- def cache_tokens(self):
- """Cache tokens to file if allowed."""
- if not self.allow_cache:
- return
-
- data = {
- 'access_token': self.access_token,
- 'refresh_token': self.refresh_token,
- 'expires': self.expires
- }
- if not os.path.exists(_cache_dir):
- os.mkdir(_cache_dir)
- with open(_token_file, 'w') as f:
- json.dump(data, f)
-
- def refresh_bearer_token(self, force=False):
- """Refreshes the OAuth token if necessary.
-
- :param bool force:
- Force-refresh the bearer token.
- """
- if not self.use_oauth:
- return
- # Skip refresh if it's not necessary and not forced
- if self.expires > time.time() and not force:
- return
-
- # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
- start_time = int(time.time() - 30)
- data = {
- 'client_id': _client_id,
- 'client_secret': _client_secret,
- 'grant_type': 'refresh_token',
- 'refresh_token': self.refresh_token
- }
- response = request._execute_request(
- 'https://oauth2.googleapis.com/token',
- 'POST',
- headers={
- 'Content-Type': 'application/json'
- },
- data=data
- )
- response_data = json.loads(response.read())
-
- self.access_token = response_data['access_token']
- self.expires = start_time + response_data['expires_in']
- self.cache_tokens()
-
- def fetch_bearer_token(self):
- """Fetch an OAuth token."""
- # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
- start_time = int(time.time() - 30)
- data = {
- 'client_id': _client_id,
- 'scope': 'https://www.googleapis.com/auth/youtube'
- }
- response = request._execute_request(
- 'https://oauth2.googleapis.com/device/code',
- 'POST',
- headers={
- 'Content-Type': 'application/json'
- },
- data=data
- )
- response_data = json.loads(response.read())
- verification_url = response_data['verification_url']
- user_code = response_data['user_code']
- print(f'Please open {verification_url} and input code {user_code}')
- input('Press enter when you have completed this step.')
-
- data = {
- 'client_id': _client_id,
- 'client_secret': _client_secret,
- 'device_code': response_data['device_code'],
- 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
- }
- response = request._execute_request(
- 'https://oauth2.googleapis.com/token',
- 'POST',
- headers={
- 'Content-Type': 'application/json'
- },
- data=data
- )
- response_data = json.loads(response.read())
-
- self.access_token = response_data['access_token']
- self.refresh_token = response_data['refresh_token']
- self.expires = start_time + response_data['expires_in']
- self.cache_tokens()
-
- @property
- def base_url(self):
- """Return the base url endpoint for the innertube API."""
- return 'https://www.youtube.com/youtubei/v1'
-
- @property
- def base_data(self):
- """Return the base json data to transmit to the innertube API."""
- return {
- 'context': self.context
- }
-
- @property
- def base_params(self):
- """Return the base query parameters to transmit to the innertube API."""
- return {
- 'key': self.api_key,
- 'contentCheckOk': True,
- 'racyCheckOk': True
- }
-
- def _call_api(self, endpoint, query, data):
- """Make a request to a given endpoint with the provided query parameters and data."""
- # Remove the API key if oauth is being used.
- if self.use_oauth:
- del query['key']
-
- endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
- headers = {
- 'Content-Type': 'application/json',
- }
- # Add the bearer token if applicable
- if self.use_oauth:
- if self.access_token:
- self.refresh_bearer_token()
- headers['Authorization'] = f'Bearer {self.access_token}'
- else:
- self.fetch_bearer_token()
- headers['Authorization'] = f'Bearer {self.access_token}'
-
- headers.update(self.header)
-
- response = request._execute_request(
- endpoint_url,
- 'POST',
- headers=headers,
- data=data
- )
- return json.loads(response.read())
-
- def browse(self):
- """Make a request to the browse endpoint.
-
- TODO: Figure out how we can use this
- """
- # endpoint = f'{self.base_url}/browse' # noqa:E800
- ...
- # return self._call_api(endpoint, query, self.base_data) # noqa:E800
-
- def config(self):
- """Make a request to the config endpoint.
-
- TODO: Figure out how we can use this
- """
- # endpoint = f'{self.base_url}/config' # noqa:E800
- ...
- # return self._call_api(endpoint, query, self.base_data) # noqa:E800
-
- def guide(self):
- """Make a request to the guide endpoint.
-
- TODO: Figure out how we can use this
- """
- # endpoint = f'{self.base_url}/guide' # noqa:E800
- ...
- # return self._call_api(endpoint, query, self.base_data) # noqa:E800
-
- def next(self):
- """Make a request to the next endpoint.
-
- TODO: Figure out how we can use this
- """
- # endpoint = f'{self.base_url}/next' # noqa:E800
- ...
- # return self._call_api(endpoint, query, self.base_data) # noqa:E800
-
- def player(self, video_id):
- """Make a request to the player endpoint.
-
- :param str video_id:
- The video id to get player info for.
- :rtype: dict
- :returns:
- Raw player info results.
- """
- endpoint = f'{self.base_url}/player'
- query = {
- 'videoId': video_id,
- }
- query.update(self.base_params)
- return self._call_api(endpoint, query, self.base_data)
-
- def search(self, search_query, continuation=None):
- """Make a request to the search endpoint.
-
- :param str search_query:
- The query to search.
- :rtype: dict
- :returns:
- Raw search query results.
- """
- endpoint = f'{self.base_url}/search'
- query = {
- 'query': search_query
- }
- query.update(self.base_params)
- data = {}
- if continuation:
- data['continuation'] = continuation
- data.update(self.base_data)
- return self._call_api(endpoint, query, data)
-
- def verify_age(self, video_id):
- """Make a request to the age_verify endpoint.
-
- Notable examples of the types of video this verification step is for:
- * https://www.youtube.com/watch?v=QLdAhwSBZ3w
- * https://www.youtube.com/watch?v=hc0ZDaAZQT0
-
- :param str video_id:
- The video id to get player info for.
- :rtype: dict
- :returns:
- Returns information that includes a URL for bypassing certain restrictions.
- """
- endpoint = f'{self.base_url}/verify_age'
- data = {
- 'nextEndpoint': {
- 'urlEndpoint': {
- 'url': f'/watch?v={video_id}'
- }
- },
- 'setControvercy': True
- }
- data.update(self.base_data)
- result = self._call_api(endpoint, self.base_params, data)
- return result
-
- def get_transcript(self, video_id):
- """Make a request to the get_transcript endpoint.
-
- This is likely related to captioning for videos, but is currently untested.
- """
- endpoint = f'{self.base_url}/get_transcript'
- query = {
- 'videoId': video_id,
- }
- query.update(self.base_params)
- result = self._call_api(endpoint, query, self.base_data)
- return result
diff --git a/lib/pytube/pytube/itags.py b/lib/pytube/pytube/itags.py
deleted file mode 100644
index 87536b1..0000000
--- a/lib/pytube/pytube/itags.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""This module contains a lookup table of YouTube's itag values."""
-from typing import Dict
-
-PROGRESSIVE_VIDEO = {
- 5: ("240p", "64kbps"),
- 6: ("270p", "64kbps"),
- 13: ("144p", None),
- 17: ("144p", "24kbps"),
- 18: ("360p", "96kbps"),
- 22: ("720p", "192kbps"),
- 34: ("360p", "128kbps"),
- 35: ("480p", "128kbps"),
- 36: ("240p", None),
- 37: ("1080p", "192kbps"),
- 38: ("3072p", "192kbps"),
- 43: ("360p", "128kbps"),
- 44: ("480p", "128kbps"),
- 45: ("720p", "192kbps"),
- 46: ("1080p", "192kbps"),
- 59: ("480p", "128kbps"),
- 78: ("480p", "128kbps"),
- 82: ("360p", "128kbps"),
- 83: ("480p", "128kbps"),
- 84: ("720p", "192kbps"),
- 85: ("1080p", "192kbps"),
- 91: ("144p", "48kbps"),
- 92: ("240p", "48kbps"),
- 93: ("360p", "128kbps"),
- 94: ("480p", "128kbps"),
- 95: ("720p", "256kbps"),
- 96: ("1080p", "256kbps"),
- 100: ("360p", "128kbps"),
- 101: ("480p", "192kbps"),
- 102: ("720p", "192kbps"),
- 132: ("240p", "48kbps"),
- 151: ("720p", "24kbps"),
- 300: ("720p", "128kbps"),
- 301: ("1080p", "128kbps"),
-}
-
-DASH_VIDEO = {
- # DASH Video
- 133: ("240p", None), # MP4
- 134: ("360p", None), # MP4
- 135: ("480p", None), # MP4
- 136: ("720p", None), # MP4
- 137: ("1080p", None), # MP4
- 138: ("2160p", None), # MP4
- 160: ("144p", None), # MP4
- 167: ("360p", None), # WEBM
- 168: ("480p", None), # WEBM
- 169: ("720p", None), # WEBM
- 170: ("1080p", None), # WEBM
- 212: ("480p", None), # MP4
- 218: ("480p", None), # WEBM
- 219: ("480p", None), # WEBM
- 242: ("240p", None), # WEBM
- 243: ("360p", None), # WEBM
- 244: ("480p", None), # WEBM
- 245: ("480p", None), # WEBM
- 246: ("480p", None), # WEBM
- 247: ("720p", None), # WEBM
- 248: ("1080p", None), # WEBM
- 264: ("1440p", None), # MP4
- 266: ("2160p", None), # MP4
- 271: ("1440p", None), # WEBM
- 272: ("4320p", None), # WEBM
- 278: ("144p", None), # WEBM
- 298: ("720p", None), # MP4
- 299: ("1080p", None), # MP4
- 302: ("720p", None), # WEBM
- 303: ("1080p", None), # WEBM
- 308: ("1440p", None), # WEBM
- 313: ("2160p", None), # WEBM
- 315: ("2160p", None), # WEBM
- 330: ("144p", None), # WEBM
- 331: ("240p", None), # WEBM
- 332: ("360p", None), # WEBM
- 333: ("480p", None), # WEBM
- 334: ("720p", None), # WEBM
- 335: ("1080p", None), # WEBM
- 336: ("1440p", None), # WEBM
- 337: ("2160p", None), # WEBM
- 394: ("144p", None), # MP4
- 395: ("240p", None), # MP4
- 396: ("360p", None), # MP4
- 397: ("480p", None), # MP4
- 398: ("720p", None), # MP4
- 399: ("1080p", None), # MP4
- 400: ("1440p", None), # MP4
- 401: ("2160p", None), # MP4
- 402: ("4320p", None), # MP4
- 571: ("4320p", None), # MP4
- 694: ("144p", None), # MP4
- 695: ("240p", None), # MP4
- 696: ("360p", None), # MP4
- 697: ("480p", None), # MP4
- 698: ("720p", None), # MP4
- 699: ("1080p", None), # MP4
- 700: ("1440p", None), # MP4
- 701: ("2160p", None), # MP4
- 702: ("4320p", None), # MP4
-}
-
-DASH_AUDIO = {
- # DASH Audio
- 139: (None, "48kbps"), # MP4
- 140: (None, "128kbps"), # MP4
- 141: (None, "256kbps"), # MP4
- 171: (None, "128kbps"), # WEBM
- 172: (None, "256kbps"), # WEBM
- 249: (None, "50kbps"), # WEBM
- 250: (None, "70kbps"), # WEBM
- 251: (None, "160kbps"), # WEBM
- 256: (None, "192kbps"), # MP4
- 258: (None, "384kbps"), # MP4
- 325: (None, None), # MP4
- 328: (None, None), # MP4
-}
-
-ITAGS = {
- **PROGRESSIVE_VIDEO,
- **DASH_VIDEO,
- **DASH_AUDIO,
-}
-
-HDR = [330, 331, 332, 333, 334, 335, 336, 337]
-_3D = [82, 83, 84, 85, 100, 101, 102]
-LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
-
-
-def get_format_profile(itag: int) -> Dict:
- """Get additional format information for a given itag.
-
- :param str itag:
- YouTube format identifier code.
- """
- itag = int(itag)
- if itag in ITAGS:
- res, bitrate = ITAGS[itag]
- else:
- res, bitrate = None, None
- return {
- "resolution": res,
- "abr": bitrate,
- "is_live": itag in LIVE,
- "is_3d": itag in _3D,
- "is_hdr": itag in HDR,
- "is_dash": (
- itag in DASH_AUDIO
- or itag in DASH_VIDEO
- ),
- }
diff --git a/lib/pytube/pytube/metadata.py b/lib/pytube/pytube/metadata.py
deleted file mode 100644
index be12c63..0000000
--- a/lib/pytube/pytube/metadata.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""This module contains the YouTubeMetadata class."""
-import json
-from typing import Dict, List, Optional
-
-
-class YouTubeMetadata:
- def __init__(self, metadata: List):
- self._raw_metadata: List = metadata
- self._metadata = [{}]
-
- for el in metadata:
- # We only add metadata to the dict if it has a simpleText title.
- if 'title' in el and 'simpleText' in el['title']:
- metadata_title = el['title']['simpleText']
- else:
- continue
-
- contents = el['contents'][0]
- if 'simpleText' in contents:
- self._metadata[-1][metadata_title] = contents['simpleText']
- elif 'runs' in contents:
- self._metadata[-1][metadata_title] = contents['runs'][0]['text']
-
- # Upon reaching a dividing line, create a new grouping
- if el.get('hasDividerLine', False):
- self._metadata.append({})
-
- # If we happen to create an empty dict at the end, drop it
- if self._metadata[-1] == {}:
- self._metadata = self._metadata[:-1]
-
- def __getitem__(self, key):
- return self._metadata[key]
-
- def __iter__(self):
- for el in self._metadata:
- yield el
-
- def __str__(self):
- return json.dumps(self._metadata)
-
- @property
- def raw_metadata(self) -> Optional[Dict]:
- return self._raw_metadata
-
- @property
- def metadata(self):
- return self._metadata
diff --git a/lib/pytube/pytube/monostate.py b/lib/pytube/pytube/monostate.py
deleted file mode 100644
index 7968af5..0000000
--- a/lib/pytube/pytube/monostate.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from typing import Any, Callable, Optional
-
-
-class Monostate:
- def __init__(
- self,
- on_progress: Optional[Callable[[Any, bytes, int], None]],
- on_complete: Optional[Callable[[Any, Optional[str]], None]],
- title: Optional[str] = None,
- duration: Optional[int] = None,
- ):
- self.on_progress = on_progress
- self.on_complete = on_complete
- self.title = title
- self.duration = duration
diff --git a/lib/pytube/pytube/parser.py b/lib/pytube/pytube/parser.py
deleted file mode 100644
index fbf27b3..0000000
--- a/lib/pytube/pytube/parser.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import ast
-import json
-import re
-from pytube.exceptions import HTMLParseError
-
-
-def parse_for_all_objects(html, preceding_regex):
- """Parses input html to find all matches for the input starting point.
-
- :param str html:
- HTML to be parsed for an object.
- :param str preceding_regex:
- Regex to find the string preceding the object.
- :rtype list:
- :returns:
- A list of dicts created from parsing the objects.
- """
- result = []
- regex = re.compile(preceding_regex)
- match_iter = regex.finditer(html)
- for match in match_iter:
- if match:
- start_index = match.end()
- try:
- obj = parse_for_object_from_startpoint(html, start_index)
- except HTMLParseError:
- # Some of the instances might fail because set is technically
- # a method of the ytcfg object. We'll skip these since they
- # don't seem relevant at the moment.
- continue
- else:
- result.append(obj)
-
- if len(result) == 0:
- raise HTMLParseError(f'No matches for regex {preceding_regex}')
-
- return result
-
-
-def parse_for_object(html, preceding_regex):
- """Parses input html to find the end of a JavaScript object.
-
- :param str html:
- HTML to be parsed for an object.
- :param str preceding_regex:
- Regex to find the string preceding the object.
- :rtype dict:
- :returns:
- A dict created from parsing the object.
- """
- regex = re.compile(preceding_regex)
- result = regex.search(html)
- if not result:
- raise HTMLParseError(f'No matches for regex {preceding_regex}')
-
- start_index = result.end()
- return parse_for_object_from_startpoint(html, start_index)
-
-
-def find_object_from_startpoint(html, start_point):
- """Parses input html to find the end of a JavaScript object.
-
- :param str html:
- HTML to be parsed for an object.
- :param int start_point:
- Index of where the object starts.
- :rtype dict:
- :returns:
- A dict created from parsing the object.
- """
- html = html[start_point:]
- if html[0] not in ['{','[']:
- raise HTMLParseError(f'Invalid start point. Start of HTML:\n{html[:20]}')
-
- # First letter MUST be a open brace, so we put that in the stack,
- # and skip the first character.
- last_char = '{'
- curr_char = None
- stack = [html[0]]
- i = 1
-
- context_closers = {
- '{': '}',
- '[': ']',
- '"': '"',
- '/': '/' # javascript regex
- }
-
- while i < len(html):
- if len(stack) == 0:
- break
- if curr_char not in [' ', '\n']:
- last_char = curr_char
- curr_char = html[i]
- curr_context = stack[-1]
-
- # If we've reached a context closer, we can remove an element off the stack
- if curr_char == context_closers[curr_context]:
- stack.pop()
- i += 1
- continue
-
- # Strings and regex expressions require special context handling because they can contain
- # context openers *and* closers
- if curr_context in ['"', '/']:
- # If there's a backslash in a string or regex expression, we skip a character
- if curr_char == '\\':
- i += 2
- continue
- else:
- # Non-string contexts are when we need to look for context openers.
- if curr_char in context_closers.keys():
- # Slash starts a regular expression depending on context
- if not (curr_char == '/' and last_char not in ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';']):
- stack.append(curr_char)
-
- i += 1
-
- full_obj = html[:i]
- return full_obj # noqa: R504
-
-
-def parse_for_object_from_startpoint(html, start_point):
- """JSONifies an object parsed from HTML.
-
- :param str html:
- HTML to be parsed for an object.
- :param int start_point:
- Index of where the object starts.
- :rtype dict:
- :returns:
- A dict created from parsing the object.
- """
- full_obj = find_object_from_startpoint(html, start_point)
- try:
- return json.loads(full_obj)
- except json.decoder.JSONDecodeError:
- try:
- return ast.literal_eval(full_obj)
- except (ValueError, SyntaxError):
- raise HTMLParseError('Could not parse object.')
-
-
-def throttling_array_split(js_array):
- """Parses the throttling array into a python list of strings.
-
- Expects input to begin with `[` and close with `]`.
-
- :param str js_array:
- The javascript array, as a string.
- :rtype: list:
- :returns:
- A list of strings representing splits on `,` in the throttling array.
- """
- results = []
- curr_substring = js_array[1:]
-
- comma_regex = re.compile(r",")
- func_regex = re.compile(r"function\([^)]*\)")
-
- while len(curr_substring) > 0:
- if curr_substring.startswith('function'):
- # Handle functions separately. These can contain commas
- match = func_regex.search(curr_substring)
- match_start, match_end = match.span()
-
- function_text = find_object_from_startpoint(curr_substring, match.span()[1])
- full_function_def = curr_substring[:match_end + len(function_text)]
- results.append(full_function_def)
- curr_substring = curr_substring[len(full_function_def) + 1:]
- else:
- match = comma_regex.search(curr_substring)
-
- # Try-catch to capture end of array
- try:
- match_start, match_end = match.span()
- except AttributeError:
- match_start = len(curr_substring) - 1
- match_end = match_start + 1
-
- curr_el = curr_substring[:match_start]
- results.append(curr_el)
- curr_substring = curr_substring[match_end:]
-
- return results
diff --git a/lib/pytube/pytube/query.py b/lib/pytube/pytube/query.py
deleted file mode 100644
index 72d2391..0000000
--- a/lib/pytube/pytube/query.py
+++ /dev/null
@@ -1,424 +0,0 @@
-"""This module provides a query interface for media streams and captions."""
-from collections.abc import Mapping, Sequence
-from typing import Callable, List, Optional, Union
-
-from pytube import Caption, Stream
-from pytube.helpers import deprecated
-
-
-class StreamQuery(Sequence):
- """Interface for querying the available media streams."""
-
- def __init__(self, fmt_streams):
- """Construct a :class:`StreamQuery `.
-
- param list fmt_streams:
- list of :class:`Stream ` instances.
- """
- self.fmt_streams = fmt_streams
- self.itag_index = {int(s.itag): s for s in fmt_streams}
-
- def filter(
- self,
- fps=None,
- res=None,
- resolution=None,
- mime_type=None,
- type=None,
- subtype=None,
- file_extension=None,
- abr=None,
- bitrate=None,
- video_codec=None,
- audio_codec=None,
- only_audio=None,
- only_video=None,
- progressive=None,
- adaptive=None,
- is_dash=None,
- custom_filter_functions=None,
- ):
- """Apply the given filtering criterion.
-
- :param fps:
- (optional) The frames per second.
- :type fps:
- int or None
-
- :param resolution:
- (optional) Alias to ``res``.
- :type res:
- str or None
-
- :param res:
- (optional) The video resolution.
- :type resolution:
- str or None
-
- :param mime_type:
- (optional) Two-part identifier for file formats and format contents
- composed of a "type", a "subtype".
- :type mime_type:
- str or None
-
- :param type:
- (optional) Type part of the ``mime_type`` (e.g.: audio, video).
- :type type:
- str or None
-
- :param subtype:
- (optional) Sub-type part of the ``mime_type`` (e.g.: mp4, mov).
- :type subtype:
- str or None
-
- :param file_extension:
- (optional) Alias to ``sub_type``.
- :type file_extension:
- str or None
-
- :param abr:
- (optional) Average bitrate (ABR) refers to the average amount of
- data transferred per unit of time (e.g.: 64kbps, 192kbps).
- :type abr:
- str or None
-
- :param bitrate:
- (optional) Alias to ``abr``.
- :type bitrate:
- str or None
-
- :param video_codec:
- (optional) Video compression format.
- :type video_codec:
- str or None
-
- :param audio_codec:
- (optional) Audio compression format.
- :type audio_codec:
- str or None
-
- :param bool progressive:
- Excludes adaptive streams (one file contains both audio and video
- tracks).
-
- :param bool adaptive:
- Excludes progressive streams (audio and video are on separate
- tracks).
-
- :param bool is_dash:
- Include/exclude dash streams.
-
- :param bool only_audio:
- Excludes streams with video tracks.
-
- :param bool only_video:
- Excludes streams with audio tracks.
-
- :param custom_filter_functions:
- (optional) Interface for defining complex filters without
- subclassing.
- :type custom_filter_functions:
- list or None
-
- """
- filters = []
- if res or resolution:
- if isinstance(res, str) or isinstance(resolution, str):
- filters.append(lambda s: s.resolution == (res or resolution))
- elif isinstance(res, list) or isinstance(resolution, list):
- filters.append(lambda s: s.resolution in (res or resolution))
-
- if fps:
- filters.append(lambda s: s.fps == fps)
-
- if mime_type:
- filters.append(lambda s: s.mime_type == mime_type)
-
- if type:
- filters.append(lambda s: s.type == type)
-
- if subtype or file_extension:
- filters.append(lambda s: s.subtype == (subtype or file_extension))
-
- if abr or bitrate:
- filters.append(lambda s: s.abr == (abr or bitrate))
-
- if video_codec:
- filters.append(lambda s: s.video_codec == video_codec)
-
- if audio_codec:
- filters.append(lambda s: s.audio_codec == audio_codec)
-
- if only_audio:
- filters.append(
- lambda s: (
- s.includes_audio_track and not s.includes_video_track
- ),
- )
-
- if only_video:
- filters.append(
- lambda s: (
- s.includes_video_track and not s.includes_audio_track
- ),
- )
-
- if progressive:
- filters.append(lambda s: s.is_progressive)
-
- if adaptive:
- filters.append(lambda s: s.is_adaptive)
-
- if custom_filter_functions:
- filters.extend(custom_filter_functions)
-
- if is_dash is not None:
- filters.append(lambda s: s.is_dash == is_dash)
-
- return self._filter(filters)
-
- def _filter(self, filters: List[Callable]) -> "StreamQuery":
- fmt_streams = self.fmt_streams
- for filter_lambda in filters:
- fmt_streams = filter(filter_lambda, fmt_streams)
- return StreamQuery(list(fmt_streams))
-
- def order_by(self, attribute_name: str) -> "StreamQuery":
- """Apply a sort order. Filters out stream the do not have the attribute.
-
- :param str attribute_name:
- The name of the attribute to sort by.
- """
- has_attribute = [
- s
- for s in self.fmt_streams
- if getattr(s, attribute_name) is not None
- ]
- # Check that the attributes have string values.
- if has_attribute and isinstance(
- getattr(has_attribute[0], attribute_name), str
- ):
- # Try to return a StreamQuery sorted by the integer representations
- # of the values.
- try:
- return StreamQuery(
- sorted(
- has_attribute,
- key=lambda s: int(
- "".join(
- filter(str.isdigit, getattr(s, attribute_name))
- )
- ), # type: ignore # noqa: E501
- )
- )
- except ValueError:
- pass
-
- return StreamQuery(
- sorted(has_attribute, key=lambda s: getattr(s, attribute_name))
- )
-
- def desc(self) -> "StreamQuery":
- """Sort streams in descending order.
-
- :rtype: :class:`StreamQuery `
-
- """
- return StreamQuery(self.fmt_streams[::-1])
-
- def asc(self) -> "StreamQuery":
- """Sort streams in ascending order.
-
- :rtype: :class:`StreamQuery `
-
- """
- return self
-
- def get_by_itag(self, itag: int) -> Optional[Stream]:
- """Get the corresponding :class:`Stream ` for a given itag.
-
- :param int itag:
- YouTube format identifier code.
- :rtype: :class:`Stream ` or None
- :returns:
- The :class:`Stream ` matching the given itag or None if
- not found.
-
- """
- return self.itag_index.get(int(itag))
-
- def get_by_resolution(self, resolution: str) -> Optional[Stream]:
- """Get the corresponding :class:`Stream ` for a given resolution.
-
- Stream must be a progressive mp4.
-
- :param str resolution:
- Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
- :rtype: :class:`Stream ` or None
- :returns:
- The :class:`Stream ` matching the given itag or None if
- not found.
-
- """
- return self.filter(
- progressive=True, subtype="mp4", resolution=resolution
- ).first()
-
- def get_lowest_resolution(self) -> Optional[Stream]:
- """Get lowest resolution stream that is a progressive mp4.
-
- :rtype: :class:`Stream ` or None
- :returns:
- The :class:`Stream ` matching the given itag or None if
- not found.
-
- """
- return (
- self.filter(progressive=True, subtype="mp4")
- .order_by("resolution")
- .first()
- )
-
- def get_highest_resolution(self) -> Optional[Stream]:
- """Get highest resolution stream that is a progressive video.
-
- :rtype: :class:`Stream ` or None
- :returns:
- The :class:`Stream ` matching the given itag or None if
- not found.
-
- """
- return self.filter(progressive=True).order_by("resolution").last()
-
- def get_audio_only(self, subtype: str = "mp4") -> Optional[Stream]:
- """Get highest bitrate audio stream for given codec (defaults to mp4)
-
- :param str subtype:
- Audio subtype, defaults to mp4
- :rtype: :class:`Stream ` or None
- :returns:
- The :class:`Stream ` matching the given itag or None if
- not found.
- """
- return (
- self.filter(only_audio=True, subtype=subtype)
- .order_by("abr")
- .last()
- )
-
- def otf(self, is_otf: bool = False) -> "StreamQuery":
- """Filter stream by OTF, useful if some streams have 404 URLs
-
- :param bool is_otf: Set to False to retrieve only non-OTF streams
- :rtype: :class:`StreamQuery `
- :returns: A StreamQuery object with otf filtered streams
- """
- return self._filter([lambda s: s.is_otf == is_otf])
-
- def first(self) -> Optional[Stream]:
- """Get the first :class:`Stream ` in the results.
-
- :rtype: :class:`Stream ` or None
- :returns:
- the first result of this query or None if the result doesn't
- contain any streams.
-
- """
- try:
- return self.fmt_streams[0]
- except IndexError:
- return None
-
- def last(self):
- """Get the last :class:`Stream ` in the results.
-
- :rtype: :class:`Stream ` or None
- :returns:
- Return the last result of this query or None if the result
- doesn't contain any streams.
-
- """
- try:
- return self.fmt_streams[-1]
- except IndexError:
- pass
-
- @deprecated("Get the size of this list directly using len()")
- def count(self, value: Optional[str] = None) -> int: # pragma: no cover
- """Get the count of items in the list.
-
- :rtype: int
- """
- if value:
- return self.fmt_streams.count(value)
-
- return len(self)
-
- @deprecated("This object can be treated as a list, all() is useless")
- def all(self) -> List[Stream]: # pragma: no cover
- """Get all the results represented by this query as a list.
-
- :rtype: list
-
- """
- return self.fmt_streams
-
- def __getitem__(self, i: Union[slice, int]):
- return self.fmt_streams[i]
-
- def __len__(self) -> int:
- return len(self.fmt_streams)
-
- def __repr__(self) -> str:
- return f"{self.fmt_streams}"
-
-
-class CaptionQuery(Mapping):
- """Interface for querying the available captions."""
-
- def __init__(self, captions: List[Caption]):
- """Construct a :class:`Caption `.
-
- param list captions:
- list of :class:`Caption ` instances.
-
- """
- self.lang_code_index = {c.code: c for c in captions}
-
- @deprecated(
- "This object can be treated as a dictionary, i.e. captions['en']"
- )
- def get_by_language_code(
- self, lang_code: str
- ) -> Optional[Caption]: # pragma: no cover
- """Get the :class:`Caption ` for a given ``lang_code``.
-
- :param str lang_code:
- The code that identifies the caption language.
- :rtype: :class:`Caption ` or None
- :returns:
- The :class:`Caption ` matching the given ``lang_code`` or
- None if it does not exist.
- """
- return self.lang_code_index.get(lang_code)
-
- @deprecated("This object can be treated as a dictionary")
- def all(self) -> List[Caption]: # pragma: no cover
- """Get all the results represented by this query as a list.
-
- :rtype: list
-
- """
- return list(self.lang_code_index.values())
-
- def __getitem__(self, i: str):
- return self.lang_code_index[i]
-
- def __len__(self) -> int:
- return len(self.lang_code_index)
-
- def __iter__(self):
- return iter(self.lang_code_index.values())
-
- def __repr__(self) -> str:
- return f"{self.lang_code_index}"
diff --git a/lib/pytube/pytube/request.py b/lib/pytube/pytube/request.py
deleted file mode 100644
index df1f41c..0000000
--- a/lib/pytube/pytube/request.py
+++ /dev/null
@@ -1,269 +0,0 @@
-"""Implements a simple wrapper around urlopen."""
-import http.client
-import json
-import logging
-import re
-import socket
-from functools import lru_cache
-from urllib import parse
-from urllib.error import URLError
-from urllib.request import Request, urlopen
-
-from pytube.exceptions import RegexMatchError, MaxRetriesExceeded
-from pytube.helpers import regex_search
-
-logger = logging.getLogger(__name__)
-default_range_size = 9437184 # 9MB
-
-
-def _execute_request(
- url,
- method=None,
- headers=None,
- data=None,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT
-):
- base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
- if headers:
- base_headers.update(headers)
- if data:
- # encode data for request
- if not isinstance(data, bytes):
- data = bytes(json.dumps(data), encoding="utf-8")
- if url.lower().startswith("http"):
- request = Request(url, headers=base_headers, method=method, data=data)
- else:
- raise ValueError("Invalid URL")
- return urlopen(request, timeout=timeout) # nosec
-
-
-def get(url, extra_headers=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- """Send an http GET request.
-
- :param str url:
- The URL to perform the GET request for.
- :param dict extra_headers:
- Extra headers to add to the request
- :rtype: str
- :returns:
- UTF-8 encoded string of response
- """
- if extra_headers is None:
- extra_headers = {}
- response = _execute_request(url, headers=extra_headers, timeout=timeout)
- return response.read().decode("utf-8")
-
-
-def post(url, extra_headers=None, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- """Send an http POST request.
-
- :param str url:
- The URL to perform the POST request for.
- :param dict extra_headers:
- Extra headers to add to the request
- :param dict data:
- The data to send on the POST request
- :rtype: str
- :returns:
- UTF-8 encoded string of response
- """
- # could technically be implemented in get,
- # but to avoid confusion implemented like this
- if extra_headers is None:
- extra_headers = {}
- if data is None:
- data = {}
- # required because the youtube servers are strict on content type
- # raises HTTPError [400]: Bad Request otherwise
- extra_headers.update({"Content-Type": "application/json"})
- response = _execute_request(
- url,
- headers=extra_headers,
- data=data,
- timeout=timeout
- )
- return response.read().decode("utf-8")
-
-
-def seq_stream(
- url,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- max_retries=0
-):
- """Read the response in sequence.
- :param str url: The URL to perform the GET request for.
- :rtype: Iterable[bytes]
- """
- # YouTube expects a request sequence number as part of the parameters.
- split_url = parse.urlsplit(url)
- base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path)
-
- querys = dict(parse.parse_qsl(split_url.query))
-
- # The 0th sequential request provides the file headers, which tell us
- # information about how the file is segmented.
- querys['sq'] = 0
- url = base_url + parse.urlencode(querys)
-
- segment_data = b''
- for chunk in stream(url, timeout=timeout, max_retries=max_retries):
- yield chunk
- segment_data += chunk
-
- # We can then parse the header to find the number of segments
- stream_info = segment_data.split(b'\r\n')
- segment_count_pattern = re.compile(b'Segment-Count: (\\d+)')
- for line in stream_info:
- match = segment_count_pattern.search(line)
- if match:
- segment_count = int(match.group(1).decode('utf-8'))
-
- # We request these segments sequentially to build the file.
- seq_num = 1
- while seq_num <= segment_count:
- # Create sequential request URL
- querys['sq'] = seq_num
- url = base_url + parse.urlencode(querys)
-
- yield from stream(url, timeout=timeout, max_retries=max_retries)
- seq_num += 1
- return # pylint: disable=R1711
-
-
-def stream(
- url,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- max_retries=0
-):
- """Read the response in chunks.
- :param str url: The URL to perform the GET request for.
- :rtype: Iterable[bytes]
- """
- file_size: int = default_range_size # fake filesize to start
- downloaded = 0
- while downloaded < file_size:
- stop_pos = min(downloaded + default_range_size, file_size) - 1
- range_header = f"bytes={downloaded}-{stop_pos}"
- tries = 0
-
- # Attempt to make the request multiple times as necessary.
- while True:
- # If the max retries is exceeded, raise an exception
- if tries >= 1 + max_retries:
- raise MaxRetriesExceeded()
-
- # Try to execute the request, ignoring socket timeouts
- try:
- response = _execute_request(
- url + f"&range={downloaded}-{stop_pos}",
- method="GET",
- timeout=timeout
- )
- except URLError as e:
- # We only want to skip over timeout errors, and
- # raise any other URLError exceptions
- if isinstance(e.reason, socket.timeout):
- pass
- else:
- raise
- except http.client.IncompleteRead:
- # Allow retries on IncompleteRead errors for unreliable connections
- pass
- else:
- # On a successful request, break from loop
- break
- tries += 1
-
- if file_size == default_range_size:
- try:
- resp = _execute_request(
- url + f"&range={0}-{99999999999}",
- method="GET",
- timeout=timeout
- )
- content_range = resp.info()["Content-Length"]
- file_size = int(content_range)
- except (KeyError, IndexError, ValueError) as e:
- logger.error(e)
- while True:
- chunk = response.read()
- if not chunk:
- break
- downloaded += len(chunk)
- yield chunk
- return # pylint: disable=R1711
-
-
-@lru_cache()
-def filesize(url):
- """Fetch size in bytes of file at given URL
-
- :param str url: The URL to get the size of
- :returns: int: size in bytes of remote file
- """
- return int(head(url)["content-length"])
-
-
-@lru_cache()
-def seq_filesize(url):
- """Fetch size in bytes of file at given URL from sequential requests
-
- :param str url: The URL to get the size of
- :returns: int: size in bytes of remote file
- """
- total_filesize = 0
- # YouTube expects a request sequence number as part of the parameters.
- split_url = parse.urlsplit(url)
- base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path)
- querys = dict(parse.parse_qsl(split_url.query))
-
- # The 0th sequential request provides the file headers, which tell us
- # information about how the file is segmented.
- querys['sq'] = 0
- url = base_url + parse.urlencode(querys)
- response = _execute_request(
- url, method="GET"
- )
-
- response_value = response.read()
- # The file header must be added to the total filesize
- total_filesize += len(response_value)
-
- # We can then parse the header to find the number of segments
- segment_count = 0
- stream_info = response_value.split(b'\r\n')
- segment_regex = b'Segment-Count: (\\d+)'
- for line in stream_info:
- # One of the lines should contain the segment count, but we don't know
- # which, so we need to iterate through the lines to find it
- try:
- segment_count = int(regex_search(segment_regex, line, 1))
- except RegexMatchError:
- pass
-
- if segment_count == 0:
- raise RegexMatchError('seq_filesize', segment_regex)
-
- # We make HEAD requests to the segments sequentially to find the total filesize.
- seq_num = 1
- while seq_num <= segment_count:
- # Create sequential request URL
- querys['sq'] = seq_num
- url = base_url + parse.urlencode(querys)
-
- total_filesize += int(head(url)['content-length'])
- seq_num += 1
- return total_filesize
-
-
-def head(url):
- """Fetch headers returned http GET request.
-
- :param str url:
- The URL to perform the GET request for.
- :rtype: dict
- :returns:
- dictionary of lowercase headers
- """
- response_headers = _execute_request(url, method="HEAD").info()
- return {k.lower(): v for k, v in response_headers.items()}
diff --git a/lib/pytube/pytube/streams.py b/lib/pytube/pytube/streams.py
deleted file mode 100644
index 179c1ae..0000000
--- a/lib/pytube/pytube/streams.py
+++ /dev/null
@@ -1,436 +0,0 @@
-"""
-This module contains a container for stream manifest data.
-
-A container object for the media stream (video only / audio only / video+audio
-combined). This was referred to as ``Video`` in the legacy pytube version, but
-has been renamed to accommodate DASH (which serves the audio and video
-separately).
-"""
-import logging
-import os
-from math import ceil
-
-from datetime import datetime
-from typing import BinaryIO, Dict, Optional, Tuple
-from urllib.error import HTTPError
-from urllib.parse import parse_qs
-
-from pytube import extract, request
-from pytube.helpers import safe_filename, target_directory
-from pytube.itags import get_format_profile
-from pytube.monostate import Monostate
-
-logger = logging.getLogger(__name__)
-
-
-class Stream:
- """Container for stream manifest data."""
-
- def __init__(
- self, stream: Dict, monostate: Monostate
- ):
- """Construct a :class:`Stream `.
-
- :param dict stream:
- The unscrambled data extracted from YouTube.
- :param dict monostate:
- Dictionary of data shared across all instances of
- :class:`Stream `.
- """
- # A dictionary shared between all instances of :class:`Stream `
- # (Borg pattern).
- self._monostate = monostate
-
- self.url = stream["url"] # signed download url
- self.itag = int(
- stream["itag"]
- ) # stream format id (youtube nomenclature)
-
- # set type and codec info
-
- # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
- self.mime_type, self.codecs = extract.mime_type_codec(stream["mimeType"])
-
- # 'video/webm' -> 'video', 'webm'
- self.type, self.subtype = self.mime_type.split("/")
-
- # ['vp8', 'vorbis'] -> video_codec: vp8, audio_codec: vorbis. DASH
- # streams return NoneType for audio/video depending.
- self.video_codec, self.audio_codec = self.parse_codecs()
-
- self.is_otf: bool = stream["is_otf"]
- self.bitrate: Optional[int] = stream["bitrate"]
-
- # filesize in bytes
- self._filesize: Optional[int] = int(stream.get('contentLength', 0))
-
- # filesize in kilobytes
- self._filesize_kb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 * 1000) / 1000)
-
- # filesize in megabytes
- self._filesize_mb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 * 1000) / 1000)
-
- # filesize in gigabytes(fingers crossed we don't need terabytes going forward though)
- self._filesize_gb: Optional[float] = float(ceil(float(stream.get('contentLength', 0)) / 1024 / 1024 / 1024 * 1000) / 1000)
-
- # Additional information about the stream format, such as resolution,
- # frame rate, and whether the stream is live (HLS) or 3D.
- itag_profile = get_format_profile(self.itag)
- self.is_dash = itag_profile["is_dash"]
- self.abr = itag_profile["abr"] # average bitrate (audio streams only)
- if 'fps' in stream:
- self.fps = stream['fps'] # Video streams only
- self.resolution = itag_profile[
- "resolution"
- ] # resolution (e.g.: "480p")
- self.is_3d = itag_profile["is_3d"]
- self.is_hdr = itag_profile["is_hdr"]
- self.is_live = itag_profile["is_live"]
-
- @property
- def is_adaptive(self) -> bool:
- """Whether the stream is DASH.
-
- :rtype: bool
- """
- # if codecs has two elements (e.g.: ['vp8', 'vorbis']): 2 % 2 = 0
- # if codecs has one element (e.g.: ['vp8']) 1 % 2 = 1
- return bool(len(self.codecs) % 2)
-
- @property
- def is_progressive(self) -> bool:
- """Whether the stream is progressive.
-
- :rtype: bool
- """
- return not self.is_adaptive
-
- @property
- def includes_audio_track(self) -> bool:
- """Whether the stream only contains audio.
-
- :rtype: bool
- """
- return self.is_progressive or self.type == "audio"
-
- @property
- def includes_video_track(self) -> bool:
- """Whether the stream only contains video.
-
- :rtype: bool
- """
- return self.is_progressive or self.type == "video"
-
- def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]:
- """Get the video/audio codecs from list of codecs.
-
- Parse a variable length sized list of codecs and returns a
- constant two element tuple, with the video codec as the first element
- and audio as the second. Returns None if one is not available
- (adaptive only).
-
- :rtype: tuple
- :returns:
- A two element tuple with audio and video codecs.
-
- """
- video = None
- audio = None
- if not self.is_adaptive:
- video, audio = self.codecs
- elif self.includes_video_track:
- video = self.codecs[0]
- elif self.includes_audio_track:
- audio = self.codecs[0]
- return video, audio
-
- @property
- def filesize(self) -> int:
- """File size of the media stream in bytes.
-
- :rtype: int
- :returns:
- Filesize (in bytes) of the stream.
- """
- if self._filesize == 0:
- try:
- self._filesize = request.filesize(self.url)
- except HTTPError as e:
- if e.code != 404:
- raise
- self._filesize = request.seq_filesize(self.url)
- return self._filesize
-
- @property
- def filesize_kb(self) -> float:
- """File size of the media stream in kilobytes.
-
- :rtype: float
- :returns:
- Rounded filesize (in kilobytes) of the stream.
- """
- if self._filesize_kb == 0:
- try:
- self._filesize_kb = float(ceil(request.filesize(self.url)/1024 * 1000) / 1000)
- except HTTPError as e:
- if e.code != 404:
- raise
- self._filesize_kb = float(ceil(request.seq_filesize(self.url)/1024 * 1000) / 1000)
- return self._filesize_kb
-
- @property
- def filesize_mb(self) -> float:
- """File size of the media stream in megabytes.
-
- :rtype: float
- :returns:
- Rounded filesize (in megabytes) of the stream.
- """
- if self._filesize_mb == 0:
- try:
- self._filesize_mb = float(ceil(request.filesize(self.url)/1024/1024 * 1000) / 1000)
- except HTTPError as e:
- if e.code != 404:
- raise
- self._filesize_mb = float(ceil(request.seq_filesize(self.url)/1024/1024 * 1000) / 1000)
- return self._filesize_mb
-
- @property
- def filesize_gb(self) -> float:
- """File size of the media stream in gigabytes.
-
- :rtype: float
- :returns:
- Rounded filesize (in gigabytes) of the stream.
- """
- if self._filesize_gb == 0:
- try:
- self._filesize_gb = float(ceil(request.filesize(self.url)/1024/1024/1024 * 1000) / 1000)
- except HTTPError as e:
- if e.code != 404:
- raise
- self._filesize_gb = float(ceil(request.seq_filesize(self.url)/1024/1024/1024 * 1000) / 1000)
- return self._filesize_gb
-
- @property
- def title(self) -> str:
- """Get title of video
-
- :rtype: str
- :returns:
- Youtube video title
- """
- return self._monostate.title or "Unknown YouTube Video Title"
-
- @property
- def filesize_approx(self) -> int:
- """Get approximate filesize of the video
-
- Falls back to HTTP call if there is not sufficient information to approximate
-
- :rtype: int
- :returns: size of video in bytes
- """
- if self._monostate.duration and self.bitrate:
- bits_in_byte = 8
- return int(
- (self._monostate.duration * self.bitrate) / bits_in_byte
- )
-
- return self.filesize
-
- @property
- def expiration(self) -> datetime:
- expire = parse_qs(self.url.split("?")[1])["expire"][0]
- return datetime.utcfromtimestamp(int(expire))
-
- @property
- def default_filename(self) -> str:
- """Generate filename based on the video title.
-
- :rtype: str
- :returns:
- An os file system compatible filename.
- """
- filename = safe_filename(self.title)
- return f"{filename}.{self.subtype}"
-
- def download(
- self,
- output_path: Optional[str] = None,
- filename: Optional[str] = None,
- filename_prefix: Optional[str] = None,
- skip_existing: bool = True,
- timeout: Optional[int] = None,
- max_retries: Optional[int] = 0
- ) -> str:
- """Write the media stream to disk.
-
- :param output_path:
- (optional) Output path for writing media file. If one is not
- specified, defaults to the current working directory.
- :type output_path: str or None
- :param filename:
- (optional) Output filename (stem only) for writing media file.
- If one is not specified, the default filename is used.
- :type filename: str or None
- :param filename_prefix:
- (optional) A string that will be prepended to the filename.
- For example a number in a playlist or the name of a series.
- If one is not specified, nothing will be prepended
- This is separate from filename so you can use the default
- filename but still add a prefix.
- :type filename_prefix: str or None
- :param skip_existing:
- (optional) Skip existing files, defaults to True
- :type skip_existing: bool
- :param timeout:
- (optional) Request timeout length in seconds. Uses system default.
- :type timeout: int
- :param max_retries:
- (optional) Number of retries to attempt after socket timeout. Defaults to 0.
- :type max_retries: int
- :returns:
- Path to the saved video
- :rtype: str
-
- """
- file_path = self.get_file_path(
- filename=filename,
- output_path=output_path,
- filename_prefix=filename_prefix,
- )
-
- if skip_existing and self.exists_at_path(file_path):
- logger.debug(f'file {file_path} already exists, skipping')
- self.on_complete(file_path)
- return file_path
-
- bytes_remaining = self.filesize
- logger.debug(f'downloading ({self.filesize} total bytes) file to {file_path}')
-
- with open(file_path, "wb") as fh:
- try:
- for chunk in request.stream(
- self.url,
- timeout=timeout,
- max_retries=max_retries
- ):
- # reduce the (bytes) remainder by the length of the chunk.
- bytes_remaining -= len(chunk)
- # send to the on_progress callback.
- self.on_progress(chunk, fh, bytes_remaining)
- except HTTPError as e:
- if e.code != 404:
- raise
- # Some adaptive streams need to be requested with sequence numbers
- for chunk in request.seq_stream(
- self.url,
- timeout=timeout,
- max_retries=max_retries
- ):
- # reduce the (bytes) remainder by the length of the chunk.
- bytes_remaining -= len(chunk)
- # send to the on_progress callback.
- self.on_progress(chunk, fh, bytes_remaining)
- self.on_complete(file_path)
- return file_path
-
- def get_file_path(
- self,
- filename: Optional[str] = None,
- output_path: Optional[str] = None,
- filename_prefix: Optional[str] = None,
- ) -> str:
- if not filename:
- filename = self.default_filename
- if filename_prefix:
- filename = f"{filename_prefix}{filename}"
- return os.path.join(target_directory(output_path), filename)
-
- def exists_at_path(self, file_path: str) -> bool:
- return (
- os.path.isfile(file_path)
- and os.path.getsize(file_path) == self.filesize
- )
-
- def stream_to_buffer(self, buffer: BinaryIO) -> None:
- """Write the media stream to buffer
-
- :rtype: io.BytesIO buffer
- """
- bytes_remaining = self.filesize
- logger.info(
- "downloading (%s total bytes) file to buffer", self.filesize,
- )
-
- for chunk in request.stream(self.url):
- # reduce the (bytes) remainder by the length of the chunk.
- bytes_remaining -= len(chunk)
- # send to the on_progress callback.
- self.on_progress(chunk, buffer, bytes_remaining)
- self.on_complete(None)
-
- def on_progress(
- self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int
- ):
- """On progress callback function.
-
- This function writes the binary data to the file, then checks if an
- additional callback is defined in the monostate. This is exposed to
- allow things like displaying a progress bar.
-
- :param bytes chunk:
- Segment of media file binary data, not yet written to disk.
- :param file_handler:
- The file handle where the media is being written to.
- :type file_handler:
- :py:class:`io.BufferedWriter`
- :param int bytes_remaining:
- The delta between the total file size in bytes and amount already
- downloaded.
-
- :rtype: None
-
- """
- file_handler.write(chunk)
- logger.debug("download remaining: %s", bytes_remaining)
- if self._monostate.on_progress:
- self._monostate.on_progress(self, chunk, bytes_remaining)
-
- def on_complete(self, file_path: Optional[str]):
- """On download complete handler function.
-
- :param file_path:
- The file handle where the media is being written to.
- :type file_path: str
-
- :rtype: None
-
- """
- logger.debug("download finished")
- on_complete = self._monostate.on_complete
- if on_complete:
- logger.debug("calling on_complete callback %s", on_complete)
- on_complete(self, file_path)
-
- def __repr__(self) -> str:
- """Printable object representation.
-
- :rtype: str
- :returns:
- A string representation of a :class:`Stream ` object.
- """
- parts = ['itag="{s.itag}"', 'mime_type="{s.mime_type}"']
- if self.includes_video_track:
- parts.extend(['res="{s.resolution}"', 'fps="{s.fps}fps"'])
- if not self.is_adaptive:
- parts.extend(
- ['vcodec="{s.video_codec}"', 'acodec="{s.audio_codec}"',]
- )
- else:
- parts.extend(['vcodec="{s.video_codec}"'])
- else:
- parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"'])
- parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"'])
- return f""
diff --git a/lib/pytube/pytube/version.py b/lib/pytube/pytube/version.py
deleted file mode 100644
index e2a3cbc..0000000
--- a/lib/pytube/pytube/version.py
+++ /dev/null
@@ -1,4 +0,0 @@
-__version__ = "15.0.0"
-
-if __name__ == "__main__":
- print(__version__)
diff --git a/pyproject.toml b/pyproject.toml
index 1c73e9a..c2833fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,6 @@ python = "^3.11"
aiogram = "^3.1.1"
rich = "^13.6.0"
shazamio = { path = "lib/ShazamIO" }
-pytube = { path = "lib/pytube" }
sqlitedict = "^2.1.0"
spotipy = "^2.23.0"
attrs = "^23.1.0"
@@ -23,6 +22,7 @@ nest-asyncio = "^1.5.8"
icecream = "^2.1.3"
m3u8 = "^5.1.0"
cryptography = "^43.0.0"
+pytubefix = "^8.2.0"
[build-system]