File add-external-jsint.patch of Package yt-dlp
From cbf832cee291ab88ba32b345b9784c5ef028d521 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 23 Aug 2025 20:05:53 +1200
Subject: [PATCH 01/43] [youtube] POC JS Challenge Provider framework
---
yt_dlp/extractor/youtube/_video.py | 399 +++---------------
yt_dlp/extractor/youtube/js/README.md | 115 +++++
yt_dlp/extractor/youtube/js/__init__.py | 2 +
.../extractor/youtube/js/_builtin/__init__.py | 0
README.md | 21
test/test_jsc/conftest.py | 43
test/test_jsc/test_deno.py | 252 ++-
test/test_jsc/test_runtime.py | 85 +
yt_dlp/YoutubeDL.py | 44
yt_dlp/__init__.py | 17
yt_dlp/extractor/youtube/_video.py | 935 +++++--------
yt_dlp/extractor/youtube/js/README.md | 242 +--
yt_dlp/extractor/youtube/js/__init__.py | 7
yt_dlp/extractor/youtube/js/_builtin/jsinterp.py | 703 ++++-----
yt_dlp/extractor/youtube/js/_director.py | 336 ++--
yt_dlp/extractor/youtube/js/_registry.py | 11
yt_dlp/extractor/youtube/js/provider.py | 275 +--
yt_dlp/extractor/youtube/js/utils.py | 5
yt_dlp/extractor/youtube/jsc/README.md | 129 +
yt_dlp/extractor/youtube/jsc/__init__.py | 5
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
yt_dlp/extractor/youtube/jsc/_builtin/bundle/__init__.py | 27
yt_dlp/extractor/youtube/jsc/_builtin/bundle/bun.lib.js | 9
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 501 ------
yt_dlp/extractor/youtube/jsc/_builtin/bundle/deno.lib.js | 9
yt_dlp/extractor/youtube/jsc/_builtin/bundle/jsc.js | 1005 +++++++-------
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 +++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 ++++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
yt_dlp/extractor/youtube/jsc/_registry.py | 4
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
yt_dlp/extractor/youtube/jsc/utils.py | 1
yt_dlp/extractor/youtube/pot/_director.py | 11
yt_dlp/extractor/youtube/pot/_provider.py | 6
yt_dlp/globals.py | 6
yt_dlp/options.py | 35
yt_dlp/utils/_jsruntime.py | 57
README.md | 21
test/test_jsc/conftest.py | 43
test/test_jsc/test_runtime.py | 85 +
yt_dlp/YoutubeDL.py | 44
yt_dlp/__init__.py | 18
yt_dlp/extractor/youtube/_video.py | 935 +++++---------
yt_dlp/extractor/youtube/jsc/README.md | 129 +
yt_dlp/extractor/youtube/jsc/__init__.py | 5
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 504 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 ++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
yt_dlp/extractor/youtube/jsc/_registry.py | 4
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
yt_dlp/extractor/youtube/jsc/utils.py | 1
yt_dlp/extractor/youtube/pot/_director.py | 11
yt_dlp/extractor/youtube/pot/_provider.py | 6
yt_dlp/globals.py | 6
yt_dlp/options.py | 35
yt_dlp/utils/_jsruntime.py | 57
README.md | 21
test/test_jsc/conftest.py | 43
test/test_jsc/test_runtime.py | 85 +
yt_dlp/YoutubeDL.py | 44
yt_dlp/__init__.py | 18
yt_dlp/extractor/youtube/_video.py | 935 +++++---------
yt_dlp/extractor/youtube/jsc/README.md | 129 +
yt_dlp/extractor/youtube/jsc/__init__.py | 5
yt_dlp/extractor/youtube/jsc/_builtin/bun.py | 79 +
yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js | 504 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/deno.py | 82 +
yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py | 288 ++++
yt_dlp/extractor/youtube/jsc/_builtin/node.py | 47
yt_dlp/extractor/youtube/jsc/_builtin/runtime.py | 283 ++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py | 12
yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js | 3
yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js | 501 +++++++
yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js | 3
yt_dlp/extractor/youtube/jsc/_director.py | 234 +++
yt_dlp/extractor/youtube/jsc/_registry.py | 4
yt_dlp/extractor/youtube/jsc/provider.py | 157 ++
yt_dlp/extractor/youtube/jsc/utils.py | 1
yt_dlp/extractor/youtube/pot/_director.py | 11
yt_dlp/extractor/youtube/pot/_provider.py | 6
yt_dlp/globals.py | 6
yt_dlp/options.py | 35
yt_dlp/utils/_jsruntime.py | 57
27 files changed, 3036 insertions(+), 557 deletions(-)
create mode 100644 yt_dlp/extractor/youtube/js/README.md
create mode 100644 yt_dlp/extractor/youtube/js/__init__.py
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/__init__.py
create mode 100644 yt_dlp/extractor/youtube/js/_builtin/jsinterp.py
create mode 100644 yt_dlp/extractor/youtube/js/_director.py
create mode 100644 yt_dlp/extractor/youtube/js/_registry.py
create mode 100644 yt_dlp/extractor/youtube/js/provider.py
create mode 100644 yt_dlp/extractor/youtube/js/utils.py
Index: yt-dlp/README.md
===================================================================
--- yt-dlp.orig/README.md 2025-10-15 01:30:12.000000000 +0200
+++ yt-dlp/README.md 2025-10-19 19:15:20.539922302 +0200
@@ -362,6 +362,27 @@
--no-plugin-dirs Clear plugin directories to search,
including defaults and those provided by
previous --plugin-dirs
+ --js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
+ with an optional path to the runtime
+ location. This option can be used multiple
+ times to enable multiple runtimes. Supported
+ runtimes: deno, node, bun. By default, only
+ "deno" runtime is enabled.
+ --no-js-runtimes Clear JavaScript runtimes to enable,
+ including defaults and those provided by
+ previous --js-runtimes
+ --download-ext-components COMPONENT
+ Specify external components that yt-dlp is
+ allowed to download when needed. You can use
+ this option multiple times to allow multiple
+ components. Supported values: npm
+ (JavaScript dependencies from npm), ejs-
+ github (official JS scripts from yt-dlp-ejs
+ GitHub). By default, no external components
+ are allowed.
+ --no-download-ext-components Disallow downloading of all external
+ components, including any previously allowed
+ by --download-ext-components or defaults.
--flat-playlist Do not extract a playlist's URL result
entries; some entry metadata may be missing
and downloading may be bypassed
Index: yt-dlp/test/test_jsc/conftest.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/test/test_jsc/conftest.py 2025-10-19 19:15:20.541033905 +0200
@@ -0,0 +1,43 @@
+import collections
+
+import pytest
+
+import yt_dlp.globals
+from yt_dlp import YoutubeDL
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger
+
+
+class MockLogger(IEContentProviderLogger):
+ log_level = IEContentProviderLogger.LogLevel.TRACE
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.messages = collections.defaultdict(list)
+
+ def trace(self, message: str):
+ self.messages['trace'].append(message)
+
+ def debug(self, message: str, *, once=False):
+ self.messages['debug'].append(message)
+
+ def info(self, message: str):
+ self.messages['info'].append(message)
+
+ def warning(self, message: str, *, once=False):
+ self.messages['warning'].append(message)
+
+ def error(self, message: str):
+ self.messages['error'].append(message)
+
+
+@pytest.fixture
+def ie() -> InfoExtractor:
+ runtime_names = yt_dlp.globals.supported_js_runtimes.value
+ ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
+ return ydl.get_info_extractor('Youtube')
+
+
+@pytest.fixture
+def logger() -> MockLogger:
+ return MockLogger()
Index: yt-dlp/test/test_jsc/test_runtime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/test/test_jsc/test_runtime.py 2025-10-19 19:15:20.541222548 +0200
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+try:
+ import yt_dlp_jsc
+except ImportError:
+ yt_dlp_jsc = None
+
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeRequest,
+ JsChallengeType,
+ JsChallengeProviderResponse,
+ JsChallengeResponse,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
+from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
+from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
+
+
+pytestmark = pytest.mark.skipif(not yt_dlp_jsc, reason='yt-dlp-jsc not available')
+
+TESTS = [
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
+ 'ZdZIqFPQK-Ty8wId',
+ '4GMrWHyKI5cEvhDO',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/3d3ba064/player_ias_tce.vflset/en_US/base.js', [
+ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
+ ])),
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
+ '0eRGgQWJGfT5rFHFj',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/5ec65609/player_ias_tce.vflset/en_US/base.js', [
+ 'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I',
+ ])),
+ JsChallengeRequest(JsChallengeType.N, NChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
+ '_HPB-7GFg1VTkn9u',
+ 'K1t_fcB6phzuq2SF',
+ ])),
+ JsChallengeRequest(JsChallengeType.SIG, SigChallengeInput('https://www.youtube.com/s/player/6742b2b9/player_ias_tce.vflset/en_US/base.js', [
+ 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA',
+ ])),
+]
+
+RESPONSES = [
+ JsChallengeProviderResponse(test, JsChallengeResponse(test.type, (
+ NChallengeOutput if test.type is JsChallengeType.N else SigChallengeOutput
+ )(dict(zip(test.input.challenges, results)))))
+ for test, results in zip(TESTS, [
+ ['qmtUsIz04xxiNW', 'N9gmEX7YhKTSmw'],
+ ['ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O'],
+ ['4SvMpDQH-vBJCw'],
+ ['AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=='],
+ ['qUAsPryAO_ByYg', 'Y7PcOt3VE62mog'],
+ ['AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM'],
+ ])
+]
+
+
+@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP])
+def jcp(request, ie, logger):
+ obj = request.param(ie, logger, settings={'debug': ['true']})
+ if not obj.is_available():
+ pytest.skip(f'{obj.PROVIDER_NAME} is not available')
+ return obj
+
+
+def test_bulk_requests(jcp):
+ assert list(jcp.bulk_solve(TESTS)) == RESPONSES
+
+
+def test_using_cached_player(jcp):
+ requests = TESTS[:3]
+ player = jcp._get_player(requests[0].video_id, requests[0].input.player_url)
+ initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, requests)))
+ preprocessed = initial.pop('preprocessed_player')
+ result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, requests)))
+
+ assert initial == result
Index: yt-dlp/yt_dlp/YoutubeDL.py
===================================================================
--- yt-dlp.orig/yt_dlp/YoutubeDL.py 2025-10-19 19:15:07.637588892 +0200
+++ yt-dlp/yt_dlp/YoutubeDL.py 2025-10-19 19:15:20.541819906 +0200
@@ -42,6 +42,7 @@
plugin_pps,
all_plugins_loaded,
plugin_dirs,
+ supported_js_runtimes,
)
from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector
@@ -533,6 +534,17 @@
See "EXTRACTOR ARGUMENTS" for details.
Argument values must always be a list of string(s).
E.g. {'youtube': {'skip': ['dash', 'hls']}}
+ js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
+ and a dictionary of additional configuration for the runtime.
+ If None, the default runtime of "deno" will be enabled.
+ The runtime configuration dictionary can have the following keys:
+ - path: Path to the executable (optional)
+ E.g. {'deno': {'path': '/path/to/deno'}
+ download_ext_components: A list of external components that are allowed to be downloaded when required.
+ Supported components:
+ - `npm` (JS Dependencies from npm)
+ - `ejs-github` (Official JS Scripts from yt-dlp-ejs GitHub).
+ By default, no external components are allowed to be downloaded.
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
The following options are deprecated and may be removed in the future:
@@ -717,6 +729,10 @@
else:
raise
+ # Note: this must be after plugins are loaded
+ self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
+ self._validate_js_runtimes(self.params['js_runtimes'])
+
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
@@ -829,6 +845,26 @@
self.archive = preload_download_archive(self.params.get('download_archive'))
+ def _validate_js_runtimes(self, runtimes):
+ if not (
+ isinstance(runtimes, dict)
+ and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
+ ):
+ raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
+
+ if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
+ raise ValueError(
+ f'Unsupported JavaScript runtimes specified: {", ".join(unsupported_runtimes)}.'
+ f' Supported runtimes are: {", ".join(supported_js_runtimes.value.keys())}')
+
+ @functools.cached_property
+ def _js_runtimes(self):
+ runtimes = {}
+ for name, config in self.params.get('js_runtimes', {}).items():
+ runtime_cls = supported_js_runtimes.value.get(name)
+ runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
+ return runtimes
+
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
@@ -4064,6 +4100,14 @@
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
+ if not self.params.get('js_runtimes'):
+ write_debug('JS runtimes: none (disabled)')
+ else:
+ write_debug('JS runtimes: %s' % (', '.join(sorted(
+ f'{name} (unknown)' if runtime is None else join_nonempty(runtime.info.name, runtime.info.version)
+ for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
+ )) or 'none'))
+
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
Index: yt-dlp/yt_dlp/__init__.py
===================================================================
--- yt-dlp.orig/yt_dlp/__init__.py 2025-10-19 19:15:07.827951758 +0200
+++ yt-dlp/yt_dlp/__init__.py 2025-10-19 19:15:20.542793851 +0200
@@ -59,10 +59,16 @@
render_table,
setproctitle,
shell_quote,
+ traverse_obj,
variadic,
write_string,
)
from .utils._utils import _UnsafeExtensionError
+from .utils._jsruntime import (
+ BunJsRuntime as _BunJsRuntime,
+ DenoJsRuntime as _DenoJsRuntime,
+ NodeJsRuntime as _NodeJsRuntime,
+)
from .YoutubeDL import YoutubeDL
@@ -773,6 +779,10 @@
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
+ js_runtimes = {
+ runtime.lower(): {'path': path} for runtime, path in (
+ [*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
+
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
@@ -944,6 +954,8 @@
'_warnings': warnings,
'_deprecation_warnings': deprecation_warnings,
'compat_opts': opts.compat_opts,
+ 'js_runtimes': js_runtimes,
+ 'download_ext_components': opts.download_ext_components,
})
@@ -1086,6 +1098,12 @@
from .extractor import gen_extractors, list_extractors
+# Register JS runtimes
+from .globals import supported_js_runtimes
+supported_js_runtimes.value['deno'] = _DenoJsRuntime
+supported_js_runtimes.value['node'] = _NodeJsRuntime
+supported_js_runtimes.value['bun'] = _BunJsRuntime
+
__all__ = [
'YoutubeDL',
'gen_extractors',
Index: yt-dlp/yt_dlp/extractor/youtube/_video.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/_video.py 2025-10-19 19:15:07.673971836 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/_video.py 2025-10-19 19:15:20.543922286 +0200
@@ -4,9 +4,7 @@
import datetime as dt
import functools
import itertools
-import json
import math
-import os.path
import random
import re
import sys
@@ -26,10 +24,10 @@
_split_innertube_client,
short_client_name,
)
+from .jsc._director import initialize_jsc_director
+from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
-from ..openload import PhantomJSwrapper
-from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
@@ -39,13 +37,11 @@
clean_html,
datetime_from_str,
filesize_from_tbr,
- filter_dict,
float_or_none,
format_field,
get_first,
int_or_none,
join_nonempty,
- js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -1829,8 +1825,6 @@
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
}
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
- _NSIG_FUNC_CACHE_ID = 'nsig func'
- _DUMMY_STRING = 'dlp_wins'
@classmethod
def suitable(cls, url):
@@ -1850,6 +1844,7 @@
def _real_initialize(self):
super()._real_initialize()
self._pot_director = initialize_pot_director(self)
+ self._jsc_director = initialize_jsc_director(self)
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
lock = threading.Lock()
@@ -1867,7 +1862,7 @@
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict)
- _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ _, live_status, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
is_live = live_status == 'is_live'
start_time = time.time()
@@ -2115,10 +2110,6 @@
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
return f'{player_id}-{variant}'
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(str(len(part)) for part in example_sig.split('.'))
-
@classmethod
def _extract_player_info(cls, player_url):
for player_re in cls._PLAYER_INFO_RE:
@@ -2140,53 +2131,17 @@
self._code_cache[player_js_key] = code
return self._code_cache.get(player_js_key)
- def _extract_signature_function(self, video_id, player_url, example_sig):
- # Read from filesystem cache
- func_id = join_nonempty(
- self._player_js_cache_key(player_url), self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- self.write_debug(f'Extracting signature function {func_id}')
- cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None
-
- if not cache_spec:
- code = self._load_player(video_id, player_url)
- if code:
- res = self._parse_sig_js(code, player_url)
- test_string = ''.join(map(chr, range(len(example_sig))))
- cache_spec = [ord(c) for c in res(test_string)]
- self.cache.store('youtube-sigfuncs', func_id, cache_spec)
-
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- def _parse_sig_js(self, jscode, player_url):
- # Examples where `sig` is funcname:
- # sig=function(a){a=a.split(""); ... ;return a.join("")};
- # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
- # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
- # sig=function(J){J=J.split(""); ... ;return J.join("")};
- # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
- # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
- funcname = self._search_regex(
- (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
- r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
- r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
- # Old patterns
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
- # Obsolete patterns
- r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
- jscode, 'Initial JS player signature function name', group='sig')
-
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
- return lambda s: initial_function([s])
+ def _load_player_data_from_cache(self, name, player_url):
+ cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
+
+ if data := self._player_cache.get(cache_id):
+ return data
+
+ data = self.cache.load(*cache_id, min_ver='2025.07.21')
+ if data:
+ self._player_cache[cache_id] = data
+
+ return data
def _cached(self, func, *cache_id):
def inner(*args, **kwargs):
@@ -2204,17 +2159,23 @@
return ret
return inner
- def _load_player_data_from_cache(self, name, player_url):
- cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
-
- if data := self._player_cache.get(cache_id):
- return data
+ def _sig_spec_cache_id(self, player_url, spec_id):
+ return join_nonempty(self._player_js_cache_key(player_url), str(spec_id))
- data = self.cache.load(*cache_id, min_ver='2025.07.21')
- if data:
- self._player_cache[cache_id] = data
-
- return data
+ def _load_sig_spec_from_cache(self, spec_cache_id):
+ # This is almost identical to _load_player_data_from_cache
+ # I hate it
+ if spec_cache_id in self._player_cache:
+ return self._player_cache[spec_cache_id]
+ spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21')
+ if spec:
+ self._player_cache[spec_cache_id] = spec
+ return spec
+
+ def _store_sig_spec_to_cache(self, spec_cache_id, spec):
+ if spec_cache_id not in self._player_cache:
+ self._player_cache[spec_cache_id] = spec
+ self.cache.store('youtube-sigfuncs', spec_cache_id, spec)
def _store_player_data_to_cache(self, name, player_url, data):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
@@ -2222,218 +2183,6 @@
self.cache.store(*cache_id, data)
self._player_cache[cache_id] = data
- def _decrypt_signature(self, s, video_id, player_url):
- """Turn the encrypted s field into a working signature"""
- extract_sig = self._cached(
- self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
- func = extract_sig(video_id, player_url, s)
- return func(s)
-
- def _decrypt_nsig(self, s, video_id, player_url):
- """Turn the encrypted n field into a working signature"""
- if player_url is None:
- raise ExtractorError('Cannot decrypt nsig without player_url')
- player_url = urljoin('https://www.youtube.com', player_url)
-
- try:
- jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
- except ExtractorError as e:
- raise ExtractorError('Unable to extract nsig function code', cause=e)
-
- try:
- extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
- ret = extract_nsig(jsi, func_code)(s)
- except JSInterpreter.Exception as e:
- try:
- jsi = PhantomJSwrapper(self, timeout=5000)
- except ExtractorError:
- raise e
- self.report_warning(
- f'Native nsig extraction failed: Trying with PhantomJS\n'
- f' n = {s} ; player = {player_url}', video_id)
- self.write_debug(e, only_once=True)
-
- args, func_body = func_code
- ret = jsi.execute(
- f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
- video_id=video_id, note='Executing signature code').strip()
-
- self.write_debug(f'Decrypted nsig {s} => {ret}')
- # Only cache nsig func JS code to disk if successful, and only once
- self._store_player_data_to_cache('nsig', player_url, func_code)
- return ret
-
- def _extract_n_function_name(self, jscode, player_url=None):
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
- pattern = r'''(?x)
- \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
- ''' % (re.escape(varname), global_list.index(debug_str))
- if match := re.search(pattern, jscode):
- pattern = r'''(?x)
- \{\s*\)%s\(\s*
- (?:
- (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
- |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
- )[;\n]
- ''' % re.escape(match.group('argname')[::-1])
- if match := re.search(pattern, jscode[match.start()::-1]):
- a, b = match.group('funcname_a', 'funcname_b')
- return (a or b)[::-1]
- self.write_debug(join_nonempty(
- 'Initial search was unable to find nsig function name',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- # Examples (with placeholders nfunc, narray, idx):
- # * .get("n"))&&(b=nfunc(b)
- # * .get("n"))&&(b=narray[idx](b)
- # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
- # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
- # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
- # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
- # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
- funcname, idx = self._search_regex(
- r'''(?x)
- (?:
- \.get\("n"\)\)&&\(b=|
- (?:
- b=String\.fromCharCode\(110\)|
- (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
- )
- (?:
- ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
- (?:
- get\(b\)|
- [a-zA-Z0-9_$]+\[b\]\|\|null
- )\)&&\(c=|
- \b(?P<var>[a-zA-Z0-9_$]+)=
- )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
- (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
- jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
- if not funcname:
- self.report_warning(join_nonempty(
- 'Falling back to generic n function search',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
- return self._search_regex(
- r'''(?xs)
- ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
- \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
- jscode, 'Initial JS player n function name', group='name')
- elif not idx:
- return funcname
-
- return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
- f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
-
- def _interpret_player_js_global_var(self, jscode, player_url):
- """Returns tuple of: variable name string, variable value list"""
- extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
- varcode, varname, varvalue = extract_global_var(
- r'''(?x)
- (?P<q1>["\'])use\s+strict(?P=q1);\s*
- (?P<code>
- var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
- (?P<value>
- (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
- \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
- |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
- )
- )[;,]
- ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
- if not varcode:
- self.write_debug(join_nonempty(
- 'No global array variable found in player JS',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
- return None, None
-
- jsi = JSInterpreter(varcode)
- interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
- return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
-
- def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
- # Fixup global array
- varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
- if varname and global_list:
- nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
- else:
- varname = self._DUMMY_STRING
- global_list = []
-
- # Fixup typeof check
- undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
- fixed_code = re.sub(
- fr'''(?x)
- ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
- (["\'])undefined\1|
- {re.escape(varname)}\[{undefined_idx}\]
- )\s*\)\s*return\s+{re.escape(argnames[0])};
- ''', ';', nsig_code)
- if fixed_code == nsig_code:
- self.write_debug(join_nonempty(
- 'No typeof statement found in nsig function code',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- # Fixup global funcs
- jsi = JSInterpreter(fixed_code)
- cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
- try:
- self._cached(
- self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
- except JSInterpreter.Exception:
- self._player_cache.pop(cache_id, None)
-
- global_funcnames = jsi._undefined_varnames
- debug_names = []
- jsi = JSInterpreter(jscode)
- for func_name in global_funcnames:
- try:
- func_args, func_code = jsi.extract_function_code(func_name)
- fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
- debug_names.append(func_name)
- except Exception:
- self.report_warning(join_nonempty(
- f'Unable to extract global nsig function {func_name} from player JS',
- player_url and f' player = {player_url}', delim='\n'), only_once=True)
-
- if debug_names:
- self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
-
- return argnames, fixed_code
-
- def _extract_n_function_code(self, video_id, player_url):
- player_id = self._extract_player_info(player_url)
- func_code = self._load_player_data_from_cache('nsig', player_url)
- jscode = func_code or self._load_player(video_id, player_url)
- jsi = JSInterpreter(jscode)
-
- if func_code:
- return jsi, player_id, func_code
-
- func_name = self._extract_n_function_name(jscode, player_url=player_url)
-
- # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
- func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
-
- return jsi, player_id, func_code
-
- def _extract_n_function_from_code(self, jsi, func_code):
- func = jsi.extract_function_from_code(*func_code)
-
- def extract_nsig(s):
- try:
- ret = func([s])
- except JSInterpreter.Exception:
- raise
- except Exception as e:
- raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
-
- if ret.startswith('enhanced_except_') or ret.endswith(s):
- raise JSInterpreter.Exception('Signature function returned an exception')
- return ret
-
- return extract_nsig
-
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
Extract signatureTimestamp (sts)
@@ -3282,12 +3031,12 @@
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
+ sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
- f[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
@@ -3367,12 +3116,13 @@
else:
self.report_warning(msg, only_once=True)
- def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
+ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20
PREFERRED_LANG_VALUE = 10
original_language = None
itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
+ subtitles = {}
q = qualities([
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
@@ -3380,7 +3130,6 @@
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
])
- streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
format_types = self._configuration_arg('formats')
all_formats = 'duplicate' in format_types
if self._configuration_arg('include_duplicate_formats'):
@@ -3388,6 +3137,9 @@
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
'Use formats=duplicate extractor argument instead')
+ def solve_sig(s, spec):
+ return ''.join(s[i] for i in spec)
+
def build_fragments(f):
return LazyList({
'url': update_url_query(f['url'], {
@@ -3407,279 +3159,363 @@
# For handling potential pre-playback required waiting period
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
- for fmt in streaming_formats:
- client_name = fmt[STREAMING_DATA_CLIENT_NAME]
- available_at = fmt[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
- if fmt.get('targetDurationSec'):
+ for pr in player_responses:
+ streaming_data = traverse_obj(pr, 'streamingData')
+ if not streaming_data:
continue
- itag = str_or_none(fmt.get('itag'))
- audio_track = fmt.get('audioTrack') or {}
- stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
- if not all_formats:
- if stream_id in stream_ids:
- continue
-
- quality = fmt.get('quality')
- height = int_or_none(fmt.get('height'))
- if quality == 'tiny' or not quality:
- quality = fmt.get('audioQuality', '').lower() or quality
- # The 3gp format (17) in android client has a quality of "small",
- # but is actually worse than other formats
- if itag == '17':
- quality = 'tiny'
- if quality:
- if itag:
- itag_qualities[itag] = quality
- if height:
- res_qualities[height] = quality
-
- display_name = audio_track.get('displayName') or ''
- is_original = 'original' in display_name.lower()
- is_descriptive = 'descriptive' in display_name.lower()
- is_default = audio_track.get('audioIsDefault')
- language_code = audio_track.get('id', '').split('.')[0]
- if language_code and (is_original or (is_default and not original_language)):
- original_language = language_code
-
- has_drm = bool(fmt.get('drmFamilies'))
-
- # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
- # (adding `&sq=0` to the URL) and parsing emsg box to determine the
- # number of fragment that would subsequently requested with (`&sq=N`)
- if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not has_drm:
- continue
-
- if has_drm:
- msg = f'Some {client_name} client https formats have been skipped as they are DRM protected. '
- if client_name == 'tv':
- msg += (
- f'{"Your account" if self.is_authenticated else "The current session"} may have '
- f'an experiment that applies DRM to all videos on the tv client. '
- f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
- )
- self.report_warning(msg, video_id, only_once=True)
-
- fmt_url = fmt.get('url')
- if not fmt_url:
- sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
- fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
- encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not all((sc, fmt_url, player_url, encrypted_sig)):
- msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
- if client_name in ('web', 'web_safari'):
- msg += 'YouTube is forcing SABR streaming for this client. '
- else:
+ fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
+ is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
+ player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
+ client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
+ available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
+ streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
+
+ def get_stream_id(fmt_stream):
+ return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
+
+ def process_format_stream(fmt_stream, proto, missing_pot):
+ nonlocal original_language
+ itag = str_or_none(fmt_stream.get('itag'))
+ audio_track = fmt_stream.get('audioTrack') or {}
+ quality = fmt_stream.get('quality')
+ height = int_or_none(fmt_stream.get('height'))
+ if quality == 'tiny' or not quality:
+ quality = fmt_stream.get('audioQuality', '').lower() or quality
+ # The 3gp format (17) in android client has a quality of "small",
+ # but is actually worse than other formats
+ if itag == '17':
+ quality = 'tiny'
+ if quality:
+ if itag:
+ itag_qualities[itag] = quality
+ if height:
+ res_qualities[height] = quality
+
+ display_name = audio_track.get('displayName') or ''
+ is_original = 'original' in display_name.lower()
+ is_descriptive = 'descriptive' in display_name.lower()
+ is_default = audio_track.get('audioIsDefault')
+ language_code = audio_track.get('id', '').split('.')[0]
+ if language_code and (is_original or (is_default and not original_language)):
+ original_language = language_code
+
+ has_drm = bool(fmt_stream.get('drmFamilies'))
+
+ if has_drm:
+ msg = f'Some {client_name} client {proto} formats have been skipped as they are DRM protected. '
+ if client_name == 'tv':
msg += (
- f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
- f'{"your account" if self.is_authenticated else "the current session"}. '
+ f'{"Your account" if self.is_authenticated else "The current session"} may have '
+ f'an experiment that applies DRM to all videos on the tv client. '
+ f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.'
)
- msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
self.report_warning(msg, video_id, only_once=True)
- continue
- try:
- fmt_url += '&{}={}'.format(
- traverse_obj(sc, ('sp', -1)) or 'signature',
- self._decrypt_signature(encrypted_sig, video_id, player_url),
- )
- except ExtractorError as e:
- self.report_warning(
- f'Signature extraction failed: Some formats may be missing\n'
- f' player = {player_url}\n'
- f' {bug_reports_message(before="")}',
- video_id=video_id, only_once=True)
- self.write_debug(
- f'{video_id}: Signature extraction failure info:\n'
- f' encrypted sig = {encrypted_sig}\n'
- f' player = {player_url}')
- self.write_debug(e, only_once=True)
- continue
- query = parse_qs(fmt_url)
- if query.get('n'):
- try:
- decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
- fmt_url = update_url_query(fmt_url, {
- 'n': decrypt_nsig(query['n'][0], video_id, player_url),
- })
- except ExtractorError as e:
- if player_url:
- self.report_warning(
- f'nsig extraction failed: Some formats may be missing\n'
- f' n = {query["n"][0]} ; player = {player_url}\n'
- f' {bug_reports_message(before="")}',
- video_id=video_id, only_once=True)
- self.write_debug(e, only_once=True)
- else:
- self.report_warning(
- 'Cannot decrypt nsig without player_url: Some formats may be missing',
- video_id=video_id, only_once=True)
- continue
+ tbr = float_or_none(fmt_stream.get('averageBitrate') or fmt_stream.get('bitrate'), 1000)
+ format_duration = traverse_obj(fmt_stream, ('approxDurationMs', {float_or_none(scale=1000)}))
+ # Some formats may have much smaller duration than others (possibly damaged during encoding)
+ # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+ # Make sure to avoid false positives with small duration differences.
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
+ is_damaged = try_call(lambda: format_duration < duration // 2)
+ if is_damaged:
+ self.report_warning(
+ f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
- tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
- format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
- # Some formats may have much smaller duration than others (possibly damaged during encoding)
- # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
- # Make sure to avoid false positives with small duration differences.
- # E.g. __2ABJjxzNo, ySuUZEjARPY
- is_damaged = try_call(lambda: format_duration < duration // 2)
- if is_damaged:
- self.report_warning(
- 'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
+ if missing_pot and 'missing_pot' not in self._configuration_arg('formats'):
+ self._report_pot_format_skipped(video_id, client_name, proto)
+ return None
+
+ name = fmt_stream.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+ fps = int_or_none(fmt_stream.get('fps')) or 0
+ dct = {
+ 'asr': int_or_none(fmt_stream.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt_stream.get('contentLength')),
+ 'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
+ 'format_note': join_nonempty(
+ join_nonempty(display_name, is_default and ' (default)', delim=''),
+ name, fmt_stream.get('isDrc') and 'DRC',
+ try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
+ try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
+ is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
+ (self.get_param('verbose') or all_formats) and short_client_name(client_name),
+ delim=', '),
+ # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
+ 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
+ 'audio_channels': fmt_stream.get('audioChannels'),
+ 'height': height,
+ 'quality': q(quality) - bool(fmt_stream.get('isDrc')) / 2,
+ 'has_drm': has_drm,
+ 'tbr': tbr,
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
+ 'width': int_or_none(fmt_stream.get('width')),
+ 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
+ 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
+ # Strictly de-prioritize damaged and 3gp formats
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ }
+ mime_mobj = re.match(
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt_stream.get('mimeType') or '')
+ if mime_mobj:
+ dct['ext'] = mimetype2ext(mime_mobj.group(1))
+ dct.update(parse_codecs(mime_mobj.group(2)))
+
+ single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
+ if single_stream and dct.get('ext'):
+ dct['container'] = dct['ext'] + '_dash'
+
+ return dct
+
+ def process_https_formats():
+ proto = 'https'
+ https_fmts = []
+ for fmt_stream in streaming_formats:
+ if fmt_stream.get('targetDurationSec'):
+ continue
- fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
- pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
+ # number of fragment that would subsequently requested with (`&sq=N`)
+ if fmt_stream.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not bool(fmt_stream.get('drmFamilies')):
+ continue
+ stream_id = get_stream_id(fmt_stream)
+ if not all_formats:
+ if stream_id in stream_ids:
+ continue
+
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
+
+ require_po_token = (
+ stream_id[0] not in ['18']
+ and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided))
+
+ po_token = (
+ gvs_pots.get(client_name)
+ or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
+ if po_token:
+ if client_name not in gvs_pots:
+ gvs_pots[client_name] = po_token
+
+ fmt_url = fmt_stream.get('url')
+ encrypted_sig, sc = None, None
+ if not fmt_url:
+ sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
+ fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
+ encrypted_sig = try_get(sc, lambda x: x['s'][0])
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
+ msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
+ if client_name in ('web', 'web_safari'):
+ msg += 'YouTube is forcing SABR streaming for this client. '
+ else:
+ msg += (
+ f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
+ f'{"your account" if self.is_authenticated else "the current session"}. '
+ )
+ msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
+ self.report_warning(msg, video_id, only_once=True)
+ continue
- require_po_token = (
- itag not in ['18']
- and gvs_pot_required(
- pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
- fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
-
- po_token = (
- gvs_pots.get(client_name)
- or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
-
- if po_token:
- fmt_url = update_url_query(fmt_url, {'pot': po_token})
- if client_name not in gvs_pots:
- gvs_pots[client_name] = po_token
+ fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
+ if not fmt:
+ continue
- if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
- self._report_pot_format_skipped(video_id, client_name, 'https')
- continue
+ # signature
+ # Attempt to load sig spec from cache
+ if encrypted_sig:
+ spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
+ spec = self._load_sig_spec_from_cache(spec_cache_id)
+ if spec:
+ self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True)
+ fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature',
+ solve_sig(encrypted_sig, spec))
+ else:
+ fmt['_jsc_s_challenge'] = encrypted_sig
+ fmt['_jsc_s_sc'] = sc
+
+ # nsig
+ query = parse_qs(fmt_url)
+ if query.get('n'):
+ n_challenge = query['n'][0]
+
+ if n_challenge in self._player_cache:
+ fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
+ else:
+ fmt['_jsc_n_challenge'] = n_challenge
+
+ if po_token:
+ fmt_url = update_url_query(fmt_url, {'pot': po_token})
+
+ fmt['url'] = fmt_url
+
+ if stream_id[0]:
+ itags[stream_id[0]].add((proto, fmt.get('language')))
+ stream_ids.append(stream_id)
+
+ # For handling potential pre-playback required waiting period
+ if live_status not in ('is_live', 'post_live'):
+ fmt['available_at'] = available_at
+
+ if (all_formats or 'dashy' in format_types) and fmt['filesize']:
+ https_fmts.append({
+ **fmt,
+ 'format_id': f'{fmt["format_id"]}-dashy' if all_formats else fmt['format_id'],
+ 'protocol': 'http_dash_segments',
+ 'fragments': build_fragments(fmt),
+ })
+ if all_formats or 'dashy' not in format_types:
+ fmt['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+ https_fmts.append(fmt)
+
+ # Bulk process sig/nsig handling
+ # Retrieve all JSC Sig and Nsig requests for this player response in one go
+ n_challenges = {}
+ s_challenges = {}
+ for fmt in https_fmts:
+ # This will de-duplicate requests
+ n_challenge = fmt.pop('_jsc_n_challenge', None)
+ if n_challenge is not None:
+ n_challenges.setdefault(n_challenge, []).append(fmt)
+
+ s_challenge = fmt.pop('_jsc_s_challenge', None)
+ if s_challenge is not None:
+ s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt)
+
+ challenge_requests = []
+ if n_challenges:
+ challenge_requests.append(JsChallengeRequest(
+ type=JsChallengeType.N,
+ video_id=video_id,
+ input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url)))
+ if s_challenges:
+ challenge_requests.append(JsChallengeRequest(
+ type=JsChallengeType.SIG,
+ video_id=video_id,
+ input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url)))
+
+ if challenge_requests:
+ for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
+ if challenge_response.type == JsChallengeType.SIG:
+ for challenge, result in challenge_response.output.results.items():
+ spec_id = len(challenge)
+ spec = [ord(c) for c in result]
+ self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec)
+ s_challenge_data = s_challenges.pop(spec_id, {})
+ if not s_challenge_data:
+ continue
+ for s_challenge, fmts in s_challenge_data.items():
+ solved_challenge = solve_sig(s_challenge, spec)
+ for fmt in fmts:
+ sc = fmt.pop('_jsc_s_sc')
+ fmt['url'] += '&{}={}'.format(
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ solved_challenge)
+
+ elif challenge_response.type == JsChallengeType.N:
+ for challenge, result in challenge_response.output.results.items():
+ fmts = n_challenges.pop(challenge, [])
+ for fmt in fmts:
+ self._player_cache[challenge] = result
+ fmt['url'] = update_url_query(fmt['url'], {'n': result})
+
+ # Raise warning if any challenge requests remain
+ # Depending on type of challenge request
+ # TODO: this could happen as there are no supported JSC Providers
+ # TODO: cleanup
- name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
- fps = int_or_none(fmt.get('fps')) or 0
- dct = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
- 'format_note': join_nonempty(
- join_nonempty(display_name, is_default and ' (default)', delim=''),
- name, fmt.get('isDrc') and 'DRC',
- try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
- try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
- (self.get_param('verbose') or all_formats) and short_client_name(client_name),
- delim=', '),
- # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
- 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
- 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
- 'audio_channels': fmt.get('audioChannels'),
- 'height': height,
- 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
- 'has_drm': has_drm,
- 'tbr': tbr,
- 'filesize_approx': filesize_from_tbr(tbr, format_duration),
- 'url': fmt_url,
- 'width': int_or_none(fmt.get('width')),
- 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
- 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
- # Strictly de-prioritize damaged and 3gp formats
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
- }
- mime_mobj = re.match(
- r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
- if mime_mobj:
- dct['ext'] = mimetype2ext(mime_mobj.group(1))
- dct.update(parse_codecs(mime_mobj.group(2)))
- if itag:
- itags[itag].add(('https', dct.get('language')))
- stream_ids.append(stream_id)
- single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
- if single_stream and dct.get('ext'):
- dct['container'] = dct['ext'] + '_dash'
-
- # For handling potential pre-playback required waiting period
- if live_status not in ('is_live', 'post_live'):
- dct['available_at'] = available_at
-
- if (all_formats or 'dashy' in format_types) and dct['filesize']:
- yield {
- **dct,
- 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
- 'protocol': 'http_dash_segments',
- 'fragments': build_fragments(dct),
- }
- if all_formats or 'dashy' not in format_types:
- dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
- yield dct
-
- needs_live_processing = self._needs_live_processing(live_status, duration)
- skip_bad_formats = 'incomplete' not in format_types
-
- skip_manifests = set(self._configuration_arg('skip'))
- if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
- or (needs_live_processing and skip_bad_formats)):
- skip_manifests.add('hls')
- if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
- skip_manifests.add('dash')
-
- def process_manifest_format(f, proto, client_name, itag, missing_pot):
- key = (proto, f.get('language'))
- if not all_formats and key in itags[itag]:
- return False
-
- # For handling potential pre-playback required waiting period
- if live_status not in ('is_live', 'post_live'):
- f['available_at'] = available_at
-
- if f.get('source_preference') is None:
- f['source_preference'] = -1
-
- # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams
- if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live':
- f['source_preference'] -= 1
-
- if missing_pot:
- f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
- f['source_preference'] -= 20
-
- itags[itag].add(key)
-
- if itag and all_formats:
- f['format_id'] = f'{itag}-{proto}'
- elif any(p != proto for p, _ in itags[itag]):
- f['format_id'] = f'{itag}-{proto}'
- elif itag:
- f['format_id'] = itag
-
- if original_language and f.get('language') == original_language:
- f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
- f['language_preference'] = PREFERRED_LANG_VALUE
-
- if itag in ('616', '235'):
- f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
- f['source_preference'] += 100
-
- f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
- if f['quality'] == -1 and f.get('height'):
- f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
- if self.get_param('verbose') or all_formats:
- f['format_note'] = join_nonempty(
- f.get('format_note'), short_client_name(client_name), delim=', ')
- if f.get('fps') and f['fps'] <= 1:
- del f['fps']
-
- if proto == 'hls' and f.get('has_drm'):
- f['has_drm'] = 'maybe'
- f['source_preference'] -= 5
- return True
+ if s_challenges:
+ self.report_warning(
+ 'Signature extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
+ if n_challenges:
+ self.report_warning(
+ 'nsig extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
- subtitles = {}
- for sd in streaming_data:
- client_name = sd[STREAMING_DATA_CLIENT_NAME]
- fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
- is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
- has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
+ for cfmts in list(s_challenges.values()) + list(n_challenges.values()):
+ for fmt in cfmts:
+ if fmt in https_fmts:
+ https_fmts.remove(fmt)
+
+ yield from https_fmts
+
+ yield from process_https_formats()
+
+ needs_live_processing = self._needs_live_processing(live_status, duration)
+ skip_bad_formats = 'incomplete' not in format_types
+ if self._configuration_arg('include_incomplete_formats'):
+ skip_bad_formats = False
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+
+ skip_manifests = set(self._configuration_arg('skip'))
+ if (not self.get_param('youtube_include_hls_manifest', True)
+ or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
+ or (needs_live_processing and skip_bad_formats)):
+ skip_manifests.add('hls')
+
+ if not self.get_param('youtube_include_dash_manifest', True):
+ skip_manifests.add('dash')
+ if self._configuration_arg('include_live_dash'):
+ self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
+ elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
+ skip_manifests.add('dash')
+
+ def process_manifest_format(f, proto, client_name, itag, missing_pot):
+ key = (proto, f.get('language'))
+ if not all_formats and key in itags[itag]:
+ return False
+
+ # For handling potential pre-playback required waiting period
+ if live_status not in ('is_live', 'post_live'):
+ f['available_at'] = available_at
+
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
+ if missing_pot:
+ f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
+ f['source_preference'] -= 20
+
+ itags[itag].add(key)
+
+ if itag and all_formats:
+ f['format_id'] = f'{itag}-{proto}'
+ elif any(p != proto for p, _ in itags[itag]):
+ f['format_id'] = f'{itag}-{proto}'
+ elif itag:
+ f['format_id'] = itag
+
+ if original_language and f.get('language') == original_language:
+ f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
+ f['language_preference'] = PREFERRED_LANG_VALUE
+
+ if itag in ('616', '235'):
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+ f['source_preference'] += 100
+
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
+ if f['quality'] == -1 and f.get('height'):
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
+ if self.get_param('verbose') or all_formats:
+ f['format_note'] = join_nonempty(
+ f.get('format_note'), short_client_name(client_name), delim=', ')
+ if f.get('fps') and f['fps'] <= 1:
+ del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
+ return True
- hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
+ hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
if hls_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3699,12 +3535,12 @@
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
yield f
- dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
+ dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
if dash_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
- require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
- po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
+ po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3724,7 +3560,6 @@
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
f['is_from_start'] = True
-
yield f
yield subtitles
@@ -3797,14 +3632,13 @@
else 'was_live' if live_content
else 'not_live' if False in (is_live, live_content)
else None)
- streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
- *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ *formats, subtitles = self._extract_formats_and_subtitles(video_id, player_responses, player_url, live_status, duration)
if all(f.get('has_drm') for f in formats):
# If there are no formats that definitely don't have DRM, all have DRM
for f in formats:
f['has_drm'] = True
- return live_broadcast_details, live_status, streaming_data, formats, subtitles
+ return live_broadcast_details, live_status, formats, subtitles
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
initial_data = None
@@ -3964,8 +3798,9 @@
or int_or_none(get_first(microformats, 'lengthSeconds'))
or parse_duration(search_meta('duration')) or None)
- live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
+ live_broadcast_details, live_status, formats, automatic_captions = \
self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
+ streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
if live_status == 'post_live':
self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/README.md
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/README.md 2025-10-19 19:15:20.544750820 +0200
@@ -0,0 +1,129 @@
+# YoutubeIE JS Challenge Provider Framework
+
+As part of the YouTube extractor, we have a framework for solving JS Challenges programmatically (sig, nsig). This can be used by plugins.
+
+> [!TIP]
+> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it.
+
+
+## Public APIs
+
+- `yt_dlp.extractor.youtube.jsc.provider`
+- `yt_dlp.extractor.youtube.jsc.utils`
+
+Everything else is internal-only and no guarantees are made about the API stability.
+
+> [!WARNING]
+> We will try our best to maintain stability with the public APIs.
+> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
+> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
+
+## JS Challenge Provider
+
+`yt_dlp.extractor.youtube.jsc.provider`
+
+```python
+from yt_dlp.extractor.youtube.jsc.provider import (
+ register_provider,
+ register_preference,
+ JsChallengeProvider,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeType,
+ JsChallengeProviderResponse,
+ NChallengeOutput,
+)
+from yt_dlp.utils import traverse_obj, Popen
+import json
+import subprocess
+import typing
+
+@register_provider
+class MyJsChallengeProviderJSP(JsChallengeProvider): # Provider class name must end with "JSP"
+ PROVIDER_VERSION = '0.2.1'
+ # Define a unique display name for the provider
+ PROVIDER_NAME = 'my-provider'
+ BUG_REPORT_LOCATION = 'https://issues.example.com/report'
+
+ # Set supported challenge types.
+ # If None, the provider will handle all types.
+ _SUPPORTED_TYPES = [JsChallengeType.N]
+
+ def is_available(self) -> bool:
+ """
+ Check if the provider is available (e.g. all required dependencies are available)
+ This is used to determine if the provider should be used and to provide debug information.
+
+ IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
+
+ Since this is called multiple times, we recommend caching the result.
+ """
+ return True
+
+ def close(self):
+ # Optional close hook, called when YoutubeDL is closed.
+ pass
+
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ # ℹ️ If you need to do additional validation on the requests.
+ # Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported.
+ if len("something") > 255:
+ raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True)
+
+
+ # ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubejs-<PROVIDER_KEY>`.
+ # For this example, the extractor arg would be:
+ # `--extractor-args "youtubejs-myjschallengeprovider:bin_path=/path/to/bin"`
+ bin_path = self._configuration_arg(
+ 'bin_path', default=['/path/to/bin'])[0]
+
+ # See below for logging guidelines
+ self.logger.trace(f'Using bin path: {bin_path}')
+
+ for request in requests:
+ # You can use the _get_player method to get the player JS code if needed.
+ # This shares the same caching as the YouTube extractor, so it will not make unnecessary requests.
+ player_js = self._get_player(request.video_id, request.input.player_url)
+ cmd = f'{bin_path} {request.input.challenges} {player_js}'
+ self.logger.info(f'Executing command: {cmd}')
+ stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
+ if ret != 0:
+ # ℹ️ If there is an error, raise JsChallengeProviderError.
+ # The request will be sent to the next provider if there is one.
+ # You can specify whether it is expected or not. If it is unexpected,
+ # the log will include a link to the bug report location (BUG_REPORT_LOCATION).
+
+ # raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
+
+ # You can also only fail this specific request by returning a JsChallengeProviderResponse with the error.
+ # This will allow other requests to be processed by this provider.
+ yield JsChallengeProviderResponse(
+ request=request,
+ error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
+ )
+
+ yield JsChallengeProviderResponse(
+ request=request,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results=traverse_obj(json.loads(stdout))),
+ ))
+
+
+# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s),
+# you can define a preference function to increase/decrease the priority of providers.
+
+@register_preference(MyJsChallengeProviderJSP)
+def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 50
+```
+
+## Logging Guidelines
+
+todo
+
+## Debugging
+
+- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output.
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/__init__.py 2025-10-19 19:15:20.544934854 +0200
@@ -0,0 +1,5 @@
+# Trigger import of built-in providers
+from ._builtin.bun import BunJCP as _BunJCP # noqa: F401
+from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401
+from ._builtin.jsinterp import JsInterpJCP as _JsInterpJCP # noqa: F401
+from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bun.py 2025-10-19 19:15:20.545155764 +0200
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
+ JsRuntimeChalBaseJCP,
+ Script,
+ ScriptSource,
+ ScriptType,
+ ScriptVariant,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class BunJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'bun'
+ JS_RUNTIME_NAME = 'bun'
+
+ _ARGS = ['--bun', 'run', '-']
+ BUN_NPM_LIB_FILENAME = 'bun.lib.js'
+
+ def _iter_script_sources(self):
+ for source, func in super()._iter_script_sources():
+ if source == ScriptSource.WEB:
+ yield ScriptSource.BUILTIN, self._bun_npm_source
+ yield source, func
+
+ def _bun_npm_source(self, script_type: ScriptType, /) -> Script | None:
+ if script_type != ScriptType.LIB:
+ return None
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('npm', 'NPM package')
+ return None
+
+ # Bun-specific lib scripts that uses Bun autoimport
+ # https://bun.com/docs/runtime/autoimport
+ error_hook = lambda e: self.logger.warning(
+ f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}')
+ code = load_script(
+ self.BUN_NPM_LIB_FILENAME, error_hook=error_hook)
+ if code:
+ return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, *self._ARGS]
+ self.logger.debug(f'Running bun: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ if proc.returncode or stderr:
+ msg = 'Error running bun process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(BunJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 800
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-10-19 19:15:20.545500922 +0200
@@ -0,0 +1,504 @@
+--- yt_dlp/extractor/youtube/jsc/_builtin/bundle/core.js 2025-09-25 20:23:11.518734893 +0200
++++ /dev/null
+@@ -1,501 +0,0 @@
+-// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
+-// Do not edit, changes will be overwritten.
+-// TODO: make this automatically updated
+-var jsc = (function (meriyah, astring) {
+- 'use strict';
+-
+- function matchesStructure(
+- obj,
+- structure,
+- ) {
+- if (Array.isArray(structure)) {
+- if (!Array.isArray(obj)) {
+- return false;
+- }
+- return (
+- structure.length === obj.length &&
+- structure.every((value, index) => matchesStructure(obj[index], value))
+- );
+- }
+- if (typeof structure === "object") {
+- if (!obj) {
+- return !structure;
+- }
+- if ("or" in structure) {
+- // Handle `{ or: [a, b] }`
+- return structure.or.some((node) => matchesStructure(obj, node));
+- }
+- for (const [key, value] of Object.entries(structure)) {
+- if (!matchesStructure(obj[key ], value)) {
+- return false;
+- }
+- }
+- return true;
+- }
+- return structure === obj;
+- }
+-
+- function isOneOf(value, ...of) {
+- return of.includes(value );
+- }
+-
+- function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+-
+-
+- const logicalExpression = {
+- type: "ExpressionStatement",
+- expression: {
+- type: "LogicalExpression",
+- left: {
+- type: "Identifier",
+- },
+- right: {
+- type: "SequenceExpression",
+- expressions: [
+- {
+- type: "AssignmentExpression",
+- left: {
+- type: "Identifier",
+- },
+- operator: "=",
+- right: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- },
+- arguments: {
+- or: [
+- [
+- { type: "Literal" },
+- {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: "decodeURIComponent",
+- },
+- arguments: [{ type: "Identifier" }],
+- optional: false,
+- },
+- ],
+- [
+- {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: "decodeURIComponent",
+- },
+- arguments: [{ type: "Identifier" }],
+- optional: false,
+- },
+- ],
+- ],
+- },
+- optional: false,
+- },
+- },
+- {
+- type: "CallExpression",
+- },
+- ],
+- },
+- operator: "&&",
+- },
+- };
+-
+- const identifier$1 = {
+- or: [{
+- type: "ExpressionStatement",
+- expression: {
+- type: "AssignmentExpression",
+- operator: "=",
+- left: {
+- type: "Identifier",
+- },
+- right: {
+- type: "FunctionExpression",
+- params: [{}, {}, {}],
+- },
+- },
+- }, {
+- type: "FunctionDeclaration",
+- params: [{}, {}, {}],
+- }],
+- } ;
+-
+- function extract$1(
+- node,
+- ) {
+- if (
+- !matchesStructure(node, identifier$1 )
+- ) {
+- return null;
+- }
+- const block = (node.type === "ExpressionStatement" &&
+- node.expression.type === "AssignmentExpression" &&
+- node.expression.right.type === "FunctionExpression")
+- ? node.expression.right.body
+- : node.type === "FunctionDeclaration"
+- ? node.body
+- : null;
+- const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
+- if (!matchesStructure(relevantExpression, logicalExpression)) {
+- return null;
+- }
+- if (
+- _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
+- relevantExpression.expression.type !==
+- "LogicalExpression" ||
+- relevantExpression.expression.right.type !==
+- "SequenceExpression" ||
+- relevantExpression.expression.right.expressions[0].type !==
+- "AssignmentExpression"
+- ) {
+- return null;
+- }
+- const call = relevantExpression.expression.right.expressions[0].right;
+- if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
+- return null;
+- }
+- // TODO: verify identifiers here
+- return {
+- type: "ArrowFunctionExpression",
+- params: [
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ],
+- body: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: call.callee.name,
+- },
+- arguments: call.arguments.length === 1
+- ? [
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ]
+- : [
+- call.arguments[0],
+- {
+- type: "Identifier",
+- name: "sig",
+- },
+- ],
+- optional: false,
+- },
+- async: false,
+- expression: false,
+- generator: false,
+- };
+- }
+-
+- function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+-
+-
+- const identifier = {
+- type: "VariableDeclaration",
+- kind: "var",
+- declarations: [
+- {
+- type: "VariableDeclarator",
+- id: {
+- type: "Identifier",
+- },
+- init: {
+- type: "ArrayExpression",
+- elements: [
+- {
+- type: "Identifier",
+- },
+- ],
+- },
+- },
+- ],
+- };
+-
+- const catchBlockBody = [
+- {
+- type: "ReturnStatement",
+- argument: {
+- type: "BinaryExpression",
+- left: {
+- type: "MemberExpression",
+- object: {
+- type: "Identifier",
+- },
+- computed: true,
+- property: {
+- type: "Literal",
+- },
+- optional: false,
+- },
+- right: {
+- type: "Identifier",
+- },
+- operator: "+",
+- },
+- },
+- ] ;
+-
+- function extract(
+- node,
+- ) {
+- if (!matchesStructure(node, identifier)) {
+- // Fallback search for try { } catch { return X[12] + Y }
+- let name = null;
+- let block = null;
+- switch (node.type) {
+- case "ExpressionStatement": {
+- if (
+- node.expression.type === "AssignmentExpression" &&
+- node.expression.left.type === "Identifier" &&
+- node.expression.right.type === "FunctionExpression" &&
+- node.expression.right.params.length === 1
+- ) {
+- name = node.expression.left.name;
+- block = node.expression.right.body;
+- }
+- break;
+- }
+- case "FunctionDeclaration": {
+- if (node.params.length === 1) {
+- name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
+- block = node.body;
+- }
+- break;
+- }
+- }
+- if (!block || !name) {
+- return null;
+- }
+- const tryNode = block.body.at(-2);
+- if (
+- _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
+- _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
+- ) {
+- return null;
+- }
+- const catchBody = tryNode.handler.body.body;
+- if (matchesStructure(catchBody, catchBlockBody)) {
+- return makeSolverFuncFromName(name);
+- }
+- return null;
+- }
+-
+- if (node.type !== "VariableDeclaration") {
+- return null;
+- }
+- const declaration = node.declarations[0];
+- if (
+- declaration.type !== "VariableDeclarator" || !declaration.init ||
+- declaration.init.type !== "ArrayExpression" ||
+- declaration.init.elements.length !== 1
+- ) {
+- return null;
+- }
+- const [firstElement] = declaration.init.elements;
+- if (!firstElement || firstElement.type !== "Identifier") {
+- return null;
+- }
+- return makeSolverFuncFromName(firstElement.name);
+- }
+-
+- function makeSolverFuncFromName(name) {
+- return {
+- type: "ArrowFunctionExpression",
+- params: [
+- {
+- type: "Identifier",
+- name: "nsig",
+- },
+- ],
+- body: {
+- type: "CallExpression",
+- callee: {
+- type: "Identifier",
+- name: name,
+- },
+- arguments: [
+- {
+- type: "Identifier",
+- name: "nsig",
+- },
+- ],
+- optional: false,
+- },
+- async: false,
+- expression: false,
+- generator: false,
+- };
+- }
+-
+- const setupNodes = meriyah.parse(`
+-globalThis.XMLHttpRequest = { prototype: {} };
+-const window = Object.assign(Object.create(null), globalThis);
+-window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
+-const document = {};
+-let self = globalThis;
+-`).body;
+-
+- function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+- function preprocessPlayer(data) {
+- const ast = meriyah.parse(data);
+- const body = ast.body;
+-
+- const block = (() => {
+- switch (body.length) {
+- case 1: {
+- const func = body[0];
+- if (
+- _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
+- func.expression.type === "CallExpression" &&
+- func.expression.callee.type === "MemberExpression" &&
+- func.expression.callee.object.type === "FunctionExpression"
+- ) {
+- return func.expression.callee.object.body;
+- }
+- break;
+- }
+- case 2: {
+- const func = body[1];
+- if (
+- _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
+- func.expression.type === "CallExpression" &&
+- func.expression.callee.type === "FunctionExpression"
+- ) {
+- const block = func.expression.callee.body;
+- // Skip `var window = this;`
+- block.body.splice(0, 1);
+- return block;
+- }
+- break;
+- }
+- }
+- throw "unexpected structure";
+- })();
+-
+- const found = {
+- nsig: [] ,
+- sig: [] ,
+- };
+- const plainExpressions = block.body.filter((node) => {
+- const nsig = extract(node);
+- if (nsig) {
+- found.nsig.push(nsig);
+- }
+- const sig = extract$1(node);
+- if (sig) {
+- found.sig.push(sig);
+- }
+- if (node.type === "ExpressionStatement") {
+- if (node.expression.type === "AssignmentExpression") {
+- return true;
+- }
+- return node.expression.type === "Literal";
+- }
+- return true;
+- });
+- block.body = plainExpressions;
+-
+- for (const [name, options] of Object.entries(found)) {
+- // TODO: this is cringe fix plz
+- const unique = new Set(options.map((x) => JSON.stringify(x)));
+- if (unique.size !== 1) {
+- const message = `found ${unique.size} ${name} function possibilities`;
+- throw (
+- message +
+- (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
+- );
+- }
+- plainExpressions.push({
+- type: "ExpressionStatement",
+- expression: {
+- type: "AssignmentExpression",
+- operator: "=",
+- left: {
+- type: "MemberExpression",
+- computed: false,
+- object: {
+- type: "Identifier",
+- name: "_result",
+- },
+- property: {
+- type: "Identifier",
+- name: name,
+- },
+- },
+- right: options[0],
+- },
+- });
+- }
+-
+- ast.body.splice(0, 0, ...setupNodes);
+-
+- return astring.generate(ast);
+- }
+-
+- function getFromPrepared(code)
+-
+-
+- {
+- const resultObj = { nsig: null, sig: null };
+- Function("_result", code)(resultObj);
+- return resultObj;
+- }
+-
+- function main(input) {
+- const preprocessedPlayer = input.type === "player"
+- ? preprocessPlayer(input.player)
+- : input.preprocessed_player;
+- const solvers = getFromPrepared(preprocessedPlayer);
+-
+- const responses = input.requests.map(
+- (input) => {
+- if (!isOneOf(input.type, "nsig", "sig")) {
+- return {
+- type: "error",
+- error: `Unknown request type: ${input.type}`,
+- };
+- }
+- const solver = solvers[input.type];
+- if (!solver) {
+- return {
+- type: "error",
+- error: `Failed to extract ${input.type} function`,
+- };
+- }
+- try {
+- return {
+- type: "result",
+- data: Object.fromEntries(
+- input.challenges.map((challenge) => [challenge, solver(challenge)]),
+- ),
+- };
+- } catch (error) {
+- return {
+- type: "error",
+- error: error instanceof Error
+- ? `${error.message}\n${error.stack}`
+- : `${error}`,
+- };
+- }
+- },
+- );
+-
+- const output = {
+- type: "result",
+- responses,
+- };
+- if (input.type === "player" && input.output_preprocessed) {
+- output.preprocessed_player = preprocessedPlayer;
+- }
+- return output;
+- }
+-
+- return main;
+-
+-})(meriyah, astring);
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/deno.py 2025-10-19 19:15:20.545747674 +0200
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+import contextlib
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import (
+ JsRuntimeChalBaseJCP,
+ Script,
+ ScriptSource,
+ ScriptType,
+ ScriptVariant,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class DenoJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'deno'
+ JS_RUNTIME_NAME = 'deno'
+
+ _DENO_OPTIONS = ['--no-prompt', '--no-remote']
+ DENO_NPM_LIB_FILENAME = 'deno.lib.js'
+
+ def _iter_script_sources(self):
+ for source, func in super()._iter_script_sources():
+ if source == ScriptSource.WEB:
+ yield ScriptSource.BUILTIN, self._deno_npm_source
+ yield source, func
+
+ def _deno_npm_source(self, script_type: ScriptType, /) -> Script | None:
+ if script_type != ScriptType.LIB:
+ return None
+ if 'npm' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('npm', 'NPM package')
+ return None
+ # Deno-specific lib scripts that uses Deno NPM imports
+ error_hook = lambda e: self.logger.warning(
+ f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}')
+ code = load_script(
+ self.DENO_NPM_LIB_FILENAME, error_hook=error_hook)
+ if code:
+ # TODO: any other permissions we want when not using --no-remote?
+ with contextlib.suppress(ValueError):
+ self._DENO_OPTIONS.remove('--no-remote')
+ return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, 'run', *self._DENO_OPTIONS, '-']
+ self.logger.debug(f'Running deno: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ # TODO: fails when deno needs to download dependencies?
+ if proc.returncode or stderr:
+ msg = 'Error running deno process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(DenoJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 1000
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/jsinterp.py 2025-10-19 19:15:20.545878767 +0200
@@ -0,0 +1,288 @@
+import json
+import re
+import traceback
+from collections.abc import Generator
+
+from yt_dlp import join_nonempty, traverse_obj
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderResponse,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.jsinterp import JSInterpreter, LocalNameSpace
+from yt_dlp.utils import ExtractorError, filter_dict, js_to_json
+
+
+@register_provider
+class JsInterpJCP(JsChallengeProvider, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'jsinterp'
+ _SUPPORTED_TYPES = [JsChallengeType.SIG, JsChallengeType.N]
+
+ _NSIG_FUNC_CACHE_ID = 'nsig func'
+ _DUMMY_STRING = 'dlp_wins'
+
+ def is_available(self) -> bool:
+ return True
+
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> Generator[JsChallengeProviderResponse, None, None]:
+ for request in requests:
+ try:
+ if request.type == JsChallengeType.SIG:
+ output = self._solve_sig_challenges(request.video_id, request.input)
+ else:
+ output = self._solve_nsig_challenges(request.video_id, request.input)
+ yield JsChallengeProviderResponse(
+ request=request, response=JsChallengeResponse(type=request.type, output=output))
+ except Exception as e:
+ yield JsChallengeProviderResponse(request=request, error=e)
+
+ # region sig
+ def _solve_sig_challenges(self, video_id, sig_input: SigChallengeInput) -> SigChallengeOutput:
+ """Turn the s field into a working signature spec"""
+ results = {}
+ self.logger.trace(f'Solving {len(sig_input.challenges)} sig challenges using player {sig_input.player_url}')
+ for challenge in sig_input.challenges:
+ results[challenge] = self._solve_sig_challenge(challenge, video_id, sig_input.player_url)
+ return SigChallengeOutput(results=results)
+
+ def _solve_sig_challenge(self, challenge, video_id, player_url) -> str:
+ code = self._get_player(video_id, player_url)
+ return self._parse_sig_js(code, player_url)(challenge)
+
+ def _parse_sig_js(self, jscode, player_url):
+ # Examples where `sig` is funcname:
+ # sig=function(a){a=a.split(""); ... ;return a.join("")};
+ # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
+ # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
+ # sig=function(J){J=J.split(""); ... ;return J.join("")};
+ # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
+ # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
+ funcname = self.ie._search_regex(
+ (r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+ r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
+ r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
+ # Old patterns
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+ # Obsolete patterns
+ r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
+ return lambda s: initial_function([s])
+ # endregion sig
+
+ # region nsig
+ def _solve_nsig_challenges(self, video_id, nsig_input: NChallengeInput) -> NChallengeOutput:
+ """Turn the n field into a working signature"""
+ results = {}
+ self.logger.trace(f'Solving {len(nsig_input.challenges)} nsig challenges using player {nsig_input.player_url}')
+ for challenge in nsig_input.challenges:
+ results[challenge] = self._solve_nsig_challenge(challenge, video_id, nsig_input.player_url)
+ return NChallengeOutput(results=results)
+
+ def _solve_nsig_challenge(self, challenge, video_id, player_url) -> str:
+ """Turn the n field into a working signature"""
+ try:
+ jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
+ except ExtractorError as e:
+ raise JsChallengeProviderError(f'Unable to extract nsig function code: {e}') from e
+
+ try:
+ extract_nsig = self.ie._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
+ ret = extract_nsig(jsi, func_code)(challenge)
+ except JSInterpreter.Exception as e:
+ self.logger.debug(str(e), once=True)
+ raise JsChallengeProviderError(
+ 'Native nsig extraction failed', expected=False) from e
+
+ self.logger.debug(f'Transformed nsig {challenge} => {ret}')
+ # Only cache nsig func JS code to disk if successful, and only once
+ self.ie._store_player_data_to_cache('nsig', player_url, func_code)
+ return ret
+
+ def _extract_n_function_name(self, jscode, player_url=None):
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
+ pattern = r'''(?x)
+ \{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
+ ''' % (re.escape(varname), global_list.index(debug_str))
+ if match := re.search(pattern, jscode):
+ pattern = r'''(?x)
+ \{\s*\)%s\(\s*
+ (?:
+ (?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
+ |noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
+ )[;\n]
+ ''' % re.escape(match.group('argname')[::-1])
+ if match := re.search(pattern, jscode[match.start()::-1]):
+ a, b = match.group('funcname_a', 'funcname_b')
+ return (a or b)[::-1]
+ self.logger.debug(join_nonempty(
+ 'Initial search was unable to find nsig function name',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ # Examples (with placeholders nfunc, narray, idx):
+ # * .get("n"))&&(b=nfunc(b)
+ # * .get("n"))&&(b=narray[idx](b)
+ # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
+ # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
+ # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
+ funcname, idx = self.ie._search_regex(
+ r'''(?x)
+ (?:
+ \.get\("n"\)\)&&\(b=|
+ (?:
+ b=String\.fromCharCode\(110\)|
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
+ )
+ (?:
+ ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
+ (?:
+ get\(b\)|
+ [a-zA-Z0-9_$]+\[b\]\|\|null
+ )\)&&\(c=|
+ \b(?P<var>[a-zA-Z0-9_$]+)=
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
+ (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
+ jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
+ if not funcname:
+ self.logger.warning(join_nonempty(
+ 'Falling back to generic n function search',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+ return self.ie._search_regex(
+ r'''(?xs)
+ ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
+ \s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
+ jscode, 'Initial JS player n function name', group='name')
+ elif not idx:
+ return funcname
+
+ return json.loads(js_to_json(self.ie._search_regex(
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
+
+ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
+ # Fixup global array
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ if varname and global_list:
+ nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
+ else:
+ varname = self._DUMMY_STRING
+ global_list = []
+
+ # Fixup typeof check
+ undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
+ fixed_code = re.sub(
+ fr'''(?x)
+ ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
+ (["\'])undefined\1|
+ {re.escape(varname)}\[{undefined_idx}\]
+ )\s*\)\s*return\s+{re.escape(argnames[0])};
+ ''', ';', nsig_code)
+ if fixed_code == nsig_code:
+ self.logger.debug(join_nonempty(
+ 'No typeof statement found in nsig function code',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ # Fixup global funcs
+ jsi = JSInterpreter(fixed_code)
+ cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
+ try:
+ self.ie._cached(
+ self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
+ except JSInterpreter.Exception:
+ self.ie._player_cache.pop(cache_id, None)
+
+ global_funcnames = jsi._undefined_varnames
+ debug_names = []
+ jsi = JSInterpreter(jscode)
+ for func_name in global_funcnames:
+ try:
+ func_args, func_code = jsi.extract_function_code(func_name)
+ fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
+ debug_names.append(func_name)
+ except Exception:
+ self.logger.warning(join_nonempty(
+ f'Unable to extract global nsig function {func_name} from player JS',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+
+ if debug_names:
+ self.logger.debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
+
+ return argnames, fixed_code
+
+ def _extract_n_function_code(self, video_id, player_url):
+ player_id = self.ie._extract_player_info(player_url)
+ func_code = self.ie._load_player_data_from_cache('nsig', player_url)
+ jscode = func_code or self.ie._load_player(video_id, player_url)
+ jsi = JSInterpreter(jscode)
+
+ if func_code:
+ return jsi, player_id, func_code
+
+ func_name = self._extract_n_function_name(jscode, player_url=player_url)
+
+ # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
+ func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
+
+ return jsi, player_id, func_code
+
+ def _extract_n_function_from_code(self, jsi, func_code):
+ func = jsi.extract_function_from_code(*func_code)
+
+ def extract_nsig(s):
+ try:
+ ret = func([s])
+ except JSInterpreter.Exception:
+ raise
+ except Exception as e:
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+ return ret
+
+ return extract_nsig
+ # endregion nsig
+
+ def _interpret_player_js_global_var(self, jscode, player_url):
+ """Returns tuple of: variable name string, variable value list"""
+ extract_global_var = self.ie._cached(self.ie._search_regex, 'jsc global array', player_url)
+ varcode, varname, varvalue = extract_global_var(
+ r'''(?x)
+ (?P<q1>["\'])use\s+strict(?P=q1);\s*
+ (?P<code>
+ var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
+ (?P<value>
+ (?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
+ \.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
+ |\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
+ )
+ )[;,]
+ ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
+ if not varcode:
+ self.logger.debug(join_nonempty(
+ 'No global array variable found in player JS',
+ player_url and f' player = {player_url}', delim='\n'), once=True)
+ return None, None
+
+ jsi = JSInterpreter(varcode)
+ interpret_global_var = self.ie._cached(jsi.interpret_expression, 'jsc global list', player_url)
+ return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/node.py 2025-10-19 19:15:20.546129360 +0200
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import shlex
+import subprocess
+
+from yt_dlp.extractor.youtube.jsc._builtin.runtime import JsRuntimeChalBaseJCP
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeRequest,
+ register_preference,
+ register_provider,
+)
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
+from yt_dlp.utils import Popen
+
+
+@register_provider
+class NodeJCP(JsRuntimeChalBaseJCP, BuiltinIEContentProvider):
+ PROVIDER_NAME = 'node'
+ JS_RUNTIME_NAME = 'node'
+
+ _ARGS = ['--permission', '-']
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ cmd = [self.runtime_info.path, *self._ARGS]
+ self.logger.debug(f'Running node: {shlex.join(cmd)}')
+ with Popen(
+ cmd,
+ text=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ ) as proc:
+ stdout, stderr = proc.communicate_or_kill(stdin)
+ if proc.returncode or stderr:
+ msg = 'Error running node process'
+ if stderr:
+ msg = f'{msg}: {stderr}'
+ raise JsChallengeProviderError(msg)
+
+ return stdout
+
+
+@register_preference(NodeJCP)
+def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
+ return 900
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/runtime.py 2025-10-19 19:15:20.546291533 +0200
@@ -0,0 +1,283 @@
+from __future__ import annotations
+
+import collections
+import dataclasses
+import enum
+import functools
+import hashlib
+import importlib.resources
+import json
+import sys
+
+import yt_dlp
+from yt_dlp.extractor.youtube.jsc._builtin.scripts import load_script
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeProviderResponse,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeOutput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
+from yt_dlp.utils._jsruntime import JsRuntimeInfo
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+ from collections.abc import Generator
+
+ from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest
+
+
+class ScriptType(enum.Enum):
+ LIB = 'lib'
+ CORE = 'core'
+
+
+class ScriptVariant(enum.Enum):
+ UNKNOWN = 'unknown'
+ MINIFIED = 'minified'
+ UNMINIFIED = 'unminified'
+ DENO_NPM = 'deno_npm'
+ BUN_NPM = 'bun_npm'
+
+
+class ScriptSource(enum.Enum):
+ PYPACKAGE = 'python package'
+ BINARY = 'binary'
+ CACHE = 'cache'
+ WEB = 'web'
+ BUILTIN = 'builtin'
+
+
+@dataclasses.dataclass
+class Script:
+ type: ScriptType
+ variant: ScriptVariant
+ source: ScriptSource
+ version: str
+ code: str
+
+ @functools.cached_property
+ def hash(self, /) -> str:
+ return hashlib.sha3_512(self.code.encode()).hexdigest()
+
+ def __str__(self, /):
+ return f'<Script {self.type.value!r} v{self.version} (source: {self.source.value}) variant={self.variant.value!r} size={len(self.code)} hash={self.hash[:7]}...>'
+
+
+class JsRuntimeChalBaseJCP(JsChallengeProvider):
+ JS_RUNTIME_NAME: str
+ _CACHE_SECTION = 'challenge-solver'
+
+ _JCP_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/YouTube-JS-Challenges'
+ _REPOSITORY = 'yt-dlp/yt-dlp-jsc-deno'
+ _SUPPORTED_TYPES = [JsChallengeType.N, JsChallengeType.SIG]
+ _SUPPORTED_VERSION = '0.0.1'
+ # TODO: insert correct hashes here
+ # TODO: Integration tests for each kind of scripts source
+ _ALLOWED_HASHES = {
+ ScriptType.LIB: {
+ ScriptVariant.MINIFIED: '488c1903d8beb24ee9788400b2a91e724751b04988ba4de398320de0e36b4a9e3a8db58849189bf1d48df3fc4b0972d96b4aabfd80fea25d7c43988b437062fd',
+ ScriptVariant.DENO_NPM: 'cbd33afbfa778e436aef774f3983f0b1234ad7f737ea9dbd9783ee26dce195f4b3242d1e202b2038e748044960bc2f976372e883c76157b24acdea939dba7603',
+ ScriptVariant.BUN_NPM: '2065c7584b39d4e3fe62f147ff0572c051629a00b1bdb3dbd21d61db172a42ad0fac210e923e080a58ca21d1cbf7c6a22a727a726654bae83af045e12958a5a0',
+ },
+ ScriptType.CORE: {
+ ScriptVariant.MINIFIED: 'df0c08c152911dedd35a98bbbb6a1786718c11e4233c52abda3d19fd11d97c3ba09745dfbca913ddeed72fead18819f62139220420c41a04d5a66ed629fbde4e',
+ ScriptVariant.UNMINIFIED: '8abfd4818573b6cf397cfae227661e3449fb5ac737a272ac0cf8268d94447b04b1c9a15f459b336175bf0605678a376e962df99b2c8d5498f16db801735f771c',
+ },
+ }
+
+ _SCRIPT_FILENAMES = {
+ ScriptType.LIB: 'lib.js',
+ ScriptType.CORE: 'core.js',
+ }
+
+ _MIN_SCRIPT_FILENAMES = {
+ ScriptType.LIB: 'lib.min.js',
+ ScriptType.CORE: 'core.min.js',
+ }
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._available = True
+ # Note: developer use only, intentionally not documented.
+ # This bypasses verification of script hashes and versions.
+ # --extractor-args youtubejsc-{provider key}:dev=true
+ self.is_dev = self.settings.get('dev', []) == ['true']
+ if self.is_dev:
+ self.logger.warning(
+ f'You have enabled dev mode for {self.PROVIDER_KEY}JCP. '
+ f'This is a developer option intended for debugging. \n'
+ ' If you experience any issues while using this option, '
+ f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} open a bug report')
+
+ def _run_js_runtime(self, stdin: str, /) -> str:
+ """To be implemented by subclasses"""
+ raise NotImplementedError
+
+ def _real_bulk_solve(self, /, requests: list[JsChallengeRequest]):
+ grouped: dict[str, list[JsChallengeRequest]] = collections.defaultdict(list)
+ for request in requests:
+ grouped[request.input.player_url].append(request)
+
+ for player_url, grouped_requests in grouped.items():
+ player = self.ie.cache.load(self._CACHE_SECTION, f'player:{player_url}')
+ if player:
+ cached = True
+ else:
+ cached = False
+ video_id = next((request.video_id for request in grouped_requests), None)
+ player = self._get_player(video_id, player_url)
+
+ stdin = self._construct_stdin(player, cached, grouped_requests)
+ stdout = self._run_js_runtime(stdin)
+ output = json.loads(stdout)
+ if output['type'] == 'error':
+ raise JsChallengeProviderError(output['error'])
+
+ if preprocessed := output.get('preprocessed_player'):
+ self.ie.cache.store(self._CACHE_SECTION, f'player:{player_url}', preprocessed)
+
+ for request, response_data in zip(grouped_requests, output['responses']):
+ if response_data['type'] == 'error':
+ yield JsChallengeProviderResponse(request, None, response_data['error'])
+ else:
+ yield JsChallengeProviderResponse(request, JsChallengeResponse(request.type, (
+ NChallengeOutput(response_data['data']) if request.type is JsChallengeType.N
+ else SigChallengeOutput(response_data['data']))))
+
+ def _construct_stdin(self, player: str, preprocessed: bool, requests: list[JsChallengeRequest], /) -> str:
+ json_requests = [{
+ # TODO: i despise nsig name
+ 'type': 'nsig' if request.type.value == 'n' else request.type.value,
+ 'challenges': request.input.challenges,
+ } for request in requests]
+ data = {
+ 'type': 'preprocessed',
+ 'preprocessed_player': player,
+ 'requests': json_requests,
+ } if preprocessed else {
+ 'type': 'player',
+ 'player': player,
+ 'requests': json_requests,
+ 'output_preprocessed': True,
+ }
+ return f'''\
+ {self._lib_script.code}
+ const {{ astring, meriyah }} = lib;
+ {self._core_script.code}
+ console.log(JSON.stringify(jsc({json.dumps(data)})));
+ '''
+
+ # region: challenge solver script
+
+ @functools.cached_property
+ def _lib_script(self, /):
+ return self._get_script(ScriptType.LIB)
+
+ @functools.cached_property
+ def _core_script(self, /):
+ return self._get_script(ScriptType.CORE)
+
+ def _get_script(self, script_type: ScriptType, /) -> Script:
+ for _, from_source in self._iter_script_sources():
+ script = from_source(script_type)
+ if not script:
+ continue
+ if script.version != self._SUPPORTED_VERSION and not self.is_dev:
+ self.logger.warning(
+ f'Challenge solver {script_type.value} script version {script.version} '
+ f'is not supported (source: {script.source.value}, supported version: {self._SUPPORTED_VERSION})')
+ script_hashes = self._ALLOWED_HASHES[script.type].get(script.variant, [])
+ if script_hashes and script.hash not in script_hashes and not self.is_dev:
+ self.logger.warning(
+ f'Hash mismatch on challenge solver {script.type.value} script '
+ f'(source: {script.source.value}, hash: {script.hash})!{provider_bug_report_message(self)}')
+ else:
+ self.logger.debug(f'Using challenge solver {script.type.value} script v{script.version} (source: {script.source.value}, variant: {script.variant.value})')
+ return script
+
+ self._available = False
+ raise JsChallengeProviderRejectedRequest(f'No usable challenge solver {script_type.value} script available')
+
+ def _iter_script_sources(self) -> Generator[tuple[ScriptSource, callable]]:
+ yield from [
+ (ScriptSource.PYPACKAGE, self._pypackage_source),
+ (ScriptSource.BINARY, self._binary_source),
+ (ScriptSource.CACHE, self._cached_source),
+ (ScriptSource.BUILTIN, self._builtin_source),
+ (ScriptSource.WEB, self._web_release_source)]
+
+ def _pypackage_source(self, script_type: ScriptType, /) -> Script | None:
+ try:
+ import yt_dlp_jsc as yt_dlp_ejs
+ except ImportError as e:
+ self.logger.trace(f'yt_dlp_ejs python package unavailable, reason: {e}')
+ return None
+ # TODO: fix API naming
+ code = yt_dlp_ejs.jsc() if script_type is ScriptType.CORE else yt_dlp_ejs.lib()
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.PYPACKAGE, yt_dlp_ejs.version, code)
+
+ def _binary_source(self, script_type: ScriptType, /) -> Script | None:
+ if (
+ getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
+ and importlib.resources.is_resource(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
+ ):
+ code = importlib.resources.read_text(yt_dlp, self._MIN_SCRIPT_FILENAMES[script_type])
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.BINARY, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _cached_source(self, script_type: ScriptType, /) -> Script | None:
+ if data := self.ie.cache.load(self._CACHE_SECTION, script_type.value):
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.CACHE, data['version'], data['code'])
+ return None
+
+ def _builtin_source(self, script_type: ScriptType, /) -> Script | None:
+ error_hook = lambda _: self.logger.warning(
+ f'Failed to read builtin challenge solver {script_type.value} script{provider_bug_report_message(self)}')
+ code = load_script(
+ self._SCRIPT_FILENAMES[script_type], error_hook=error_hook)
+ if code:
+ # TODO: strip internal header comments as to match published version
+ return Script(script_type, ScriptVariant.UNMINIFIED, ScriptSource.BUILTIN, self._SUPPORTED_VERSION, code)
+ return None
+
+ def _web_release_source(self, script_type: ScriptType, /) -> Script | None:
+ if 'ejs-github' not in self.ie.get_param('download_ext_components', []):
+ self._report_ext_component_skipped('ejs-github', 'challenge solver script')
+ return None
+ url = f'https://github.com/{self._REPOSITORY}/releases/download/{self._SUPPORTED_VERSION}/{self._MIN_SCRIPT_FILENAMES[script_type]}'
+ if code := self.ie._download_webpage_with_retries(
+ url, None, f'[{self.logger.prefix}] Downloading challenge solver {script_type.value} script from {url}',
+ f'[{self.logger.prefix}] Failed to download challenge solver {script_type.value} script', fatal=False,
+ ):
+ self.ie.cache.store(self._CACHE_SECTION, script_type.value, {
+ 'version': self._SUPPORTED_VERSION,
+ 'code': code,
+ })
+ return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.WEB, self._SUPPORTED_VERSION, code)
+ return None
+
+ # endregion: challenge solver script
+
+ @property
+ def runtime_info(self) -> JsRuntimeInfo | bool:
+ runtime = self.ie._downloader._js_runtimes.get(self.JS_RUNTIME_NAME)
+ if not runtime or not runtime.info or not runtime.info.supported:
+ return False
+ return runtime.info
+
+ def is_available(self, /) -> bool:
+ if not self.runtime_info:
+ return False
+ return self._available
+
+ def _report_ext_component_skipped(self, component: str, component_description: str):
+ self.logger.warning(
+ f'External {component_description} downloads are disabled. '
+ f'This may be required to solve JS challenges using {self.JS_RUNTIME_NAME} JS runtime. '
+ f'You can enable {component_description} downloads with "--download-ext-components {component}". '
+ f'For more information and alternatives, refer to {self._JCP_GUIDE_URL}')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/__init__.py 2025-10-19 19:15:20.546654710 +0200
@@ -0,0 +1,12 @@
+import importlib.resources
+
+
+def load_script(filename, error_hook=None):
+ if importlib.resources.is_resource(__package__, filename):
+ try:
+ return importlib.resources.read_text(__package__, filename)
+ except (OSError, FileNotFoundError, ModuleNotFoundError) as e:
+ if error_hook:
+ error_hook(e)
+ return None
+ return None
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/bun.lib.js 2025-10-19 19:15:20.546873665 +0200
@@ -0,0 +1,3 @@
+// TODO: Generate this file automatically from bundle repo
+const [m, a] = await Promise.all([ import("meriyah@6.1.4"), import("astring@1.9.0") ]);
+export const lib = { meriyah: m, astring: a };
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/core.js 2025-10-19 19:15:20.547020263 +0200
@@ -0,0 +1,501 @@
+// This file is auto-generated from https://github.com/Grub4K/yt-dlp-jsc-deno
+// Do not edit, changes will be overwritten.
+// TODO: make this automatically updated
+var jsc = (function (meriyah, astring) {
+ 'use strict';
+
+ function matchesStructure(
+ obj,
+ structure,
+ ) {
+ if (Array.isArray(structure)) {
+ if (!Array.isArray(obj)) {
+ return false;
+ }
+ return (
+ structure.length === obj.length &&
+ structure.every((value, index) => matchesStructure(obj[index], value))
+ );
+ }
+ if (typeof structure === "object") {
+ if (!obj) {
+ return !structure;
+ }
+ if ("or" in structure) {
+ // Handle `{ or: [a, b] }`
+ return structure.or.some((node) => matchesStructure(obj, node));
+ }
+ for (const [key, value] of Object.entries(structure)) {
+ if (!matchesStructure(obj[key ], value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return structure === obj;
+ }
+
+ function isOneOf(value, ...of) {
+ return of.includes(value );
+ }
+
+ function _optionalChain$2(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+
+
+ const logicalExpression = {
+ type: "ExpressionStatement",
+ expression: {
+ type: "LogicalExpression",
+ left: {
+ type: "Identifier",
+ },
+ right: {
+ type: "SequenceExpression",
+ expressions: [
+ {
+ type: "AssignmentExpression",
+ left: {
+ type: "Identifier",
+ },
+ operator: "=",
+ right: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ },
+ arguments: {
+ or: [
+ [
+ { type: "Literal" },
+ {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: "decodeURIComponent",
+ },
+ arguments: [{ type: "Identifier" }],
+ optional: false,
+ },
+ ],
+ [
+ {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: "decodeURIComponent",
+ },
+ arguments: [{ type: "Identifier" }],
+ optional: false,
+ },
+ ],
+ ],
+ },
+ optional: false,
+ },
+ },
+ {
+ type: "CallExpression",
+ },
+ ],
+ },
+ operator: "&&",
+ },
+ };
+
+ const identifier$1 = {
+ or: [{
+ type: "ExpressionStatement",
+ expression: {
+ type: "AssignmentExpression",
+ operator: "=",
+ left: {
+ type: "Identifier",
+ },
+ right: {
+ type: "FunctionExpression",
+ params: [{}, {}, {}],
+ },
+ },
+ }, {
+ type: "FunctionDeclaration",
+ params: [{}, {}, {}],
+ }],
+ } ;
+
+ function extract$1(
+ node,
+ ) {
+ if (
+ !matchesStructure(node, identifier$1 )
+ ) {
+ return null;
+ }
+ const block = (node.type === "ExpressionStatement" &&
+ node.expression.type === "AssignmentExpression" &&
+ node.expression.right.type === "FunctionExpression")
+ ? node.expression.right.body
+ : node.type === "FunctionDeclaration"
+ ? node.body
+ : null;
+ const relevantExpression = _optionalChain$2([block, 'optionalAccess', _ => _.body, 'access', _2 => _2.at, 'call', _3 => _3(-2)]);
+ if (!matchesStructure(relevantExpression, logicalExpression)) {
+ return null;
+ }
+ if (
+ _optionalChain$2([relevantExpression, 'optionalAccess', _4 => _4.type]) !== "ExpressionStatement" ||
+ relevantExpression.expression.type !==
+ "LogicalExpression" ||
+ relevantExpression.expression.right.type !==
+ "SequenceExpression" ||
+ relevantExpression.expression.right.expressions[0].type !==
+ "AssignmentExpression"
+ ) {
+ return null;
+ }
+ const call = relevantExpression.expression.right.expressions[0].right;
+ if (call.type !== "CallExpression" || call.callee.type !== "Identifier") {
+ return null;
+ }
+ // TODO: verify identifiers here
+ return {
+ type: "ArrowFunctionExpression",
+ params: [
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ],
+ body: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: call.callee.name,
+ },
+ arguments: call.arguments.length === 1
+ ? [
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ]
+ : [
+ call.arguments[0],
+ {
+ type: "Identifier",
+ name: "sig",
+ },
+ ],
+ optional: false,
+ },
+ async: false,
+ expression: false,
+ generator: false,
+ };
+ }
+
+ function _optionalChain$1(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+
+
+ const identifier = {
+ type: "VariableDeclaration",
+ kind: "var",
+ declarations: [
+ {
+ type: "VariableDeclarator",
+ id: {
+ type: "Identifier",
+ },
+ init: {
+ type: "ArrayExpression",
+ elements: [
+ {
+ type: "Identifier",
+ },
+ ],
+ },
+ },
+ ],
+ };
+
+ const catchBlockBody = [
+ {
+ type: "ReturnStatement",
+ argument: {
+ type: "BinaryExpression",
+ left: {
+ type: "MemberExpression",
+ object: {
+ type: "Identifier",
+ },
+ computed: true,
+ property: {
+ type: "Literal",
+ },
+ optional: false,
+ },
+ right: {
+ type: "Identifier",
+ },
+ operator: "+",
+ },
+ },
+ ] ;
+
+ function extract(
+ node,
+ ) {
+ if (!matchesStructure(node, identifier)) {
+ // Fallback search for try { } catch { return X[12] + Y }
+ let name = null;
+ let block = null;
+ switch (node.type) {
+ case "ExpressionStatement": {
+ if (
+ node.expression.type === "AssignmentExpression" &&
+ node.expression.left.type === "Identifier" &&
+ node.expression.right.type === "FunctionExpression" &&
+ node.expression.right.params.length === 1
+ ) {
+ name = node.expression.left.name;
+ block = node.expression.right.body;
+ }
+ break;
+ }
+ case "FunctionDeclaration": {
+ if (node.params.length === 1) {
+ name = _optionalChain$1([node, 'access', _ => _.id, 'optionalAccess', _2 => _2.name]);
+ block = node.body;
+ }
+ break;
+ }
+ }
+ if (!block || !name) {
+ return null;
+ }
+ const tryNode = block.body.at(-2);
+ if (
+ _optionalChain$1([tryNode, 'optionalAccess', _3 => _3.type]) !== "TryStatement" ||
+ _optionalChain$1([tryNode, 'access', _4 => _4.handler, 'optionalAccess', _5 => _5.type]) !== "CatchClause"
+ ) {
+ return null;
+ }
+ const catchBody = tryNode.handler.body.body;
+ if (matchesStructure(catchBody, catchBlockBody)) {
+ return makeSolverFuncFromName(name);
+ }
+ return null;
+ }
+
+ if (node.type !== "VariableDeclaration") {
+ return null;
+ }
+ const declaration = node.declarations[0];
+ if (
+ declaration.type !== "VariableDeclarator" || !declaration.init ||
+ declaration.init.type !== "ArrayExpression" ||
+ declaration.init.elements.length !== 1
+ ) {
+ return null;
+ }
+ const [firstElement] = declaration.init.elements;
+ if (!firstElement || firstElement.type !== "Identifier") {
+ return null;
+ }
+ return makeSolverFuncFromName(firstElement.name);
+ }
+
+ function makeSolverFuncFromName(name) {
+ return {
+ type: "ArrowFunctionExpression",
+ params: [
+ {
+ type: "Identifier",
+ name: "nsig",
+ },
+ ],
+ body: {
+ type: "CallExpression",
+ callee: {
+ type: "Identifier",
+ name: name,
+ },
+ arguments: [
+ {
+ type: "Identifier",
+ name: "nsig",
+ },
+ ],
+ optional: false,
+ },
+ async: false,
+ expression: false,
+ generator: false,
+ };
+ }
+
+ const setupNodes = meriyah.parse(`
+globalThis.XMLHttpRequest = { prototype: {} };
+const window = Object.assign(Object.create(null), globalThis);
+window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");
+const document = {};
+let self = globalThis;
+`).body;
+
+ function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
+ function preprocessPlayer(data) {
+ const ast = meriyah.parse(data);
+ const body = ast.body;
+
+ const block = (() => {
+ switch (body.length) {
+ case 1: {
+ const func = body[0];
+ if (
+ _optionalChain([func, 'optionalAccess', _ => _.type]) === "ExpressionStatement" &&
+ func.expression.type === "CallExpression" &&
+ func.expression.callee.type === "MemberExpression" &&
+ func.expression.callee.object.type === "FunctionExpression"
+ ) {
+ return func.expression.callee.object.body;
+ }
+ break;
+ }
+ case 2: {
+ const func = body[1];
+ if (
+ _optionalChain([func, 'optionalAccess', _2 => _2.type]) === "ExpressionStatement" &&
+ func.expression.type === "CallExpression" &&
+ func.expression.callee.type === "FunctionExpression"
+ ) {
+ const block = func.expression.callee.body;
+ // Skip `var window = this;`
+ block.body.splice(0, 1);
+ return block;
+ }
+ break;
+ }
+ }
+ throw "unexpected structure";
+ })();
+
+ const found = {
+ nsig: [] ,
+ sig: [] ,
+ };
+ const plainExpressions = block.body.filter((node) => {
+ const nsig = extract(node);
+ if (nsig) {
+ found.nsig.push(nsig);
+ }
+ const sig = extract$1(node);
+ if (sig) {
+ found.sig.push(sig);
+ }
+ if (node.type === "ExpressionStatement") {
+ if (node.expression.type === "AssignmentExpression") {
+ return true;
+ }
+ return node.expression.type === "Literal";
+ }
+ return true;
+ });
+ block.body = plainExpressions;
+
+ for (const [name, options] of Object.entries(found)) {
+ // TODO: this is cringe fix plz
+ const unique = new Set(options.map((x) => JSON.stringify(x)));
+ if (unique.size !== 1) {
+ const message = `found ${unique.size} ${name} function possibilities`;
+ throw (
+ message +
+ (unique.size ? `: ${options.map((x) => astring.generate(x)).join(", ")}` : "")
+ );
+ }
+ plainExpressions.push({
+ type: "ExpressionStatement",
+ expression: {
+ type: "AssignmentExpression",
+ operator: "=",
+ left: {
+ type: "MemberExpression",
+ computed: false,
+ object: {
+ type: "Identifier",
+ name: "_result",
+ },
+ property: {
+ type: "Identifier",
+ name: name,
+ },
+ },
+ right: options[0],
+ },
+ });
+ }
+
+ ast.body.splice(0, 0, ...setupNodes);
+
+ return astring.generate(ast);
+ }
+
+ function getFromPrepared(code)
+
+
+ {
+ const resultObj = { nsig: null, sig: null };
+ Function("_result", code)(resultObj);
+ return resultObj;
+ }
+
+ function main(input) {
+ const preprocessedPlayer = input.type === "player"
+ ? preprocessPlayer(input.player)
+ : input.preprocessed_player;
+ const solvers = getFromPrepared(preprocessedPlayer);
+
+ const responses = input.requests.map(
+ (input) => {
+ if (!isOneOf(input.type, "nsig", "sig")) {
+ return {
+ type: "error",
+ error: `Unknown request type: ${input.type}`,
+ };
+ }
+ const solver = solvers[input.type];
+ if (!solver) {
+ return {
+ type: "error",
+ error: `Failed to extract ${input.type} function`,
+ };
+ }
+ try {
+ return {
+ type: "result",
+ data: Object.fromEntries(
+ input.challenges.map((challenge) => [challenge, solver(challenge)]),
+ ),
+ };
+ } catch (error) {
+ return {
+ type: "error",
+ error: error instanceof Error
+ ? `${error.message}\n${error.stack}`
+ : `${error}`,
+ };
+ }
+ },
+ );
+
+ const output = {
+ type: "result",
+ responses,
+ };
+ if (input.type === "player" && input.output_preprocessed) {
+ output.preprocessed_player = preprocessedPlayer;
+ }
+ return output;
+ }
+
+ return main;
+
+})(meriyah, astring);
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_builtin/scripts/deno.lib.js 2025-10-19 19:15:20.547291668 +0200
@@ -0,0 +1,3 @@
+// TODO: Generate this file automatically from bundle repo
+const [m, a] = await Promise.all([ import("npm:meriyah@6.1.4"), import("npm:astring@1.9.0") ]);
+export const lib = { meriyah: m, astring: a };
\ No newline at end of file
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_director.py 2025-10-19 19:15:20.547595341 +0200
@@ -0,0 +1,234 @@
+from __future__ import annotations
+
+import dataclasses
+import typing
+from collections.abc import Iterable
+
+from yt_dlp.extractor.youtube.jsc._registry import (
+ _jsc_preferences,
+ _jsc_providers,
+)
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeProviderError,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeProviderResponse,
+ JsChallengeRequest,
+ JsChallengeResponse,
+ JsChallengeType,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.pot._director import YoutubeIEContentProviderLogger, provider_display_list
+from yt_dlp.extractor.youtube.pot._provider import (
+ IEContentProviderLogger,
+)
+from yt_dlp.extractor.youtube.pot.provider import (
+ provider_bug_report_message,
+)
+
+if typing.TYPE_CHECKING:
+ from yt_dlp.extractor.youtube.jsc.provider import Preference as JsChallengePreference
+
+
+class JsChallengeRequestDirector:
+
+ def __init__(self, logger: IEContentProviderLogger):
+ self.providers: dict[str, JsChallengeProvider] = {}
+ self.preferences: list[JsChallengePreference] = []
+ self.logger = logger
+
+ def register_provider(self, provider: JsChallengeProvider):
+ self.providers[provider.PROVIDER_KEY] = provider
+
+ def register_preference(self, preference: JsChallengePreference):
+ self.preferences.append(preference)
+
+ def _get_providers(self, requests: list[JsChallengeRequest]) -> Iterable[JsChallengeProvider]:
+ """Sorts available providers by preference, given a request"""
+ preferences = {
+ provider: sum(pref(provider, requests) for pref in self.preferences)
+ for provider in self.providers.values()
+ }
+ if self.logger.log_level <= self.logger.LogLevel.TRACE:
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
+ self.logger.trace(f'JS Challenge Providers: {provider_display_list(self.providers.values())}')
+ self.logger.trace('JS Challenge Provider preferences for this request: {}'.format(', '.join(
+ f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
+
+ return (
+ provider for provider in sorted(
+ self.providers.values(), key=preferences.get, reverse=True)
+ if provider.is_available()
+ )
+
+ def _handle_error(self, e: Exception, provider: JsChallengeProvider, requests: list[JsChallengeRequest]):
+ if isinstance(e, JsChallengeProviderRejectedRequest):
+ self.logger.trace(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" rejected '
+ f'{"this request" if len(requests) == 1 else f"{len(requests)} requests"}, '
+ f'trying next available provider. Reason: {e}',
+ )
+ elif isinstance(e, JsChallengeProviderError):
+ if len(requests) == 1:
+ self.logger.warning(
+ f'Error solving {requests[0].type.value} challenge request using "{provider.PROVIDER_NAME}" provider: {e}.\n'
+ f' input = {requests[0].input}\n'
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
+ else:
+ self.logger.warning(
+ f'Error solving {len(requests)} challenge requests using "{provider.PROVIDER_NAME}" provider: {e}.\n'
+ f' requests = {requests}\n'
+ f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
+ else:
+ self.logger.error(
+ f'Unexpected error solving {len(requests)} challenge request(s) using "{provider.PROVIDER_NAME}" provider: {e!r}\n'
+ f' requests = {requests}\n'
+ f' {provider_bug_report_message(provider, before="")}', cause=e)
+
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> list[tuple[JsChallengeRequest, JsChallengeResponse]]:
+ """Solves multiple JS Challenges in bulk, returning a list of responses"""
+ if not self.providers:
+ self.logger.trace('No JS Challenge providers registered')
+ return []
+
+ results = []
+ next_requests = requests[:]
+
+ for provider in self._get_providers(next_requests):
+ if not next_requests:
+ break
+ self.logger.trace(
+ f'Attempting to solve {len(next_requests)} challenges using "{provider.PROVIDER_NAME}" provider')
+ try:
+ for response in provider.bulk_solve([dataclasses.replace(request) for request in next_requests]):
+ if not validate_provider_response(response):
+ self.logger.warning(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned an invalid response:'
+ f' response = {response!r}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ if response.error:
+ self._handle_error(response.error, provider, [response.request])
+ continue
+ if (vr_msg := validate_response(response.response, response.request)) is not True:
+ self.logger.warning(
+ f'Invalid JS Challenge response received from "{provider.PROVIDER_NAME}" provider: {vr_msg or ""}\n'
+ f' response = {response.response}\n'
+ f' request = {response.request}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ try:
+ next_requests.remove(response.request)
+ except ValueError:
+ self.logger.warning(
+ f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned a response for an unknown request:\n'
+ f' request = {response.request}\n'
+ f' {provider_bug_report_message(provider, before="")}')
+ continue
+ results.append((response.request, response.response))
+ except Exception as e:
+ self._handle_error(e, provider, next_requests)
+ continue
+
+ if len(results) != len(requests):
+ self.logger.trace(
+ f'Not all JS Challenges were solved, expected {len(requests)} responses, got {len(results)}')
+ self.logger.trace(f'Unsolved requests: {next_requests}')
+ else:
+ self.logger.trace(f'Solved all {len(requests)} requested JS Challenges')
+ return results
+
+ def close(self):
+ for provider in self.providers.values():
+ provider.close()
+
+
+EXTRACTOR_ARG_PREFIX = 'youtubejsc'
+
+
+def initialize_jsc_director(ie):
+ assert ie._downloader is not None, 'Downloader not set'
+
+ enable_trace = ie._configuration_arg(
+ 'jsc_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
+
+ if enable_trace:
+ log_level = IEContentProviderLogger.LogLevel.TRACE
+ elif ie.get_param('verbose', False):
+ log_level = IEContentProviderLogger.LogLevel.DEBUG
+ else:
+ log_level = IEContentProviderLogger.LogLevel.INFO
+
+ def get_provider_logger_and_settings(provider, logger_key):
+ logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
+ extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
+ return (
+ YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
+ ie.get_param('extractor_args', {}).get(extractor_key, {}))
+
+ director = JsChallengeRequestDirector(
+ logger=YoutubeIEContentProviderLogger(ie, 'jsc', log_level=log_level),
+ )
+
+ ie._downloader.add_close_hook(director.close)
+
+ for provider in _jsc_providers.value.values():
+ logger, settings = get_provider_logger_and_settings(provider, 'jsc')
+ director.register_provider(provider(ie, logger, settings))
+
+ for preference in _jsc_preferences.value:
+ director.register_preference(preference)
+
+ if director.logger.log_level <= director.logger.LogLevel.DEBUG:
+ # calling is_available() for every JS Challenge provider upfront may have some overhead
+ director.logger.debug(f'JS Challenge Providers: {provider_display_list(director.providers.values())}')
+ director.logger.trace(f'Registered {len(director.preferences)} JS Challenge provider preferences')
+
+ return director
+
+
+def validate_provider_response(response: JsChallengeProviderResponse) -> bool:
+ return (
+ isinstance(response, JsChallengeProviderResponse)
+ and isinstance(response.request, JsChallengeRequest)
+ and (
+ isinstance(response.response, JsChallengeResponse)
+ or (response.error is not None and isinstance(response.error, Exception)))
+ )
+
+
+def validate_response(response: JsChallengeResponse, request: JsChallengeRequest) -> bool | str:
+ if not isinstance(response, JsChallengeResponse):
+ return 'Response is not a JsChallengeResponse'
+ if request.type == JsChallengeType.N:
+ return validate_nsig_challenge_output(response.output, request.input)
+ else:
+ return validate_sig_challenge_output(response.output, request.input)
+
+
+def validate_nsig_challenge_output(challenge_output: NChallengeOutput, challenge_input: NChallengeInput) -> bool | str:
+ if not (
+ isinstance(challenge_output, NChallengeOutput)
+ and len(challenge_output.results) == len(challenge_input.challenges)
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
+ ):
+ return 'Invalid NChallengeOutput'
+
+ # Validate nsig results are valid - if they end with the input challenge then the js function returned with an exception.
+ for challenge, result in challenge_output.results.items():
+ if result.endswith(challenge):
+ return f'nsig result is invalid for {challenge!r}: {result!r}'
+ return True
+
+
+def validate_sig_challenge_output(challenge_output: SigChallengeOutput, challenge_input: SigChallengeInput) -> bool:
+ return (
+ isinstance(challenge_output, SigChallengeOutput)
+ and len(challenge_output.results) == len(challenge_input.challenges)
+ and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
+ and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
+ ) or 'Invalid SigChallengeOutput'
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/_registry.py 2025-10-19 19:15:20.547765127 +0200
@@ -0,0 +1,4 @@
+from yt_dlp.globals import Indirect
+
+_jsc_providers = Indirect({})
+_jsc_preferences = Indirect(set())
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/provider.py 2025-10-19 19:15:20.547918360 +0200
@@ -0,0 +1,157 @@
+"""PUBLIC API"""
+
+from __future__ import annotations
+
+import abc
+import dataclasses
+import enum
+import typing
+
+from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
+from yt_dlp.extractor.youtube.pot._provider import (
+ IEContentProvider,
+ IEContentProviderError,
+ register_preference_generic,
+ register_provider_generic,
+)
+from yt_dlp.utils import ExtractorError
+
+__all__ = [
+ 'JsChallengeProvider',
+ 'JsChallengeProviderError',
+ 'JsChallengeProviderRejectedRequest',
+ 'JsChallengeProviderResponse',
+ 'JsChallengeRequest',
+ 'JsChallengeResponse',
+ 'JsChallengeType',
+ 'NChallengeInput',
+ 'NChallengeOutput',
+ 'SigChallengeInput',
+ 'SigChallengeOutput',
+ 'register_preference',
+ 'register_provider',
+]
+
+
+class JsChallengeType(enum.Enum):
+ N = 'n'
+ SIG = 'sig'
+
+
+@dataclasses.dataclass(frozen=True)
+class JsChallengeRequest:
+ type: JsChallengeType
+ input: NChallengeInput | SigChallengeInput
+ video_id: str | None = None
+
+
+@dataclasses.dataclass(frozen=True)
+class NChallengeInput:
+ player_url: str
+ challenges: list[str] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass(frozen=True)
+class SigChallengeInput:
+ player_url: str
+ challenges: list[str] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass(frozen=True)
+class NChallengeOutput:
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
+
+
+@dataclasses.dataclass(frozen=True)
+class SigChallengeOutput:
+ results: dict[str, str] = dataclasses.field(default_factory=dict)
+
+
+@dataclasses.dataclass
+class JsChallengeProviderResponse:
+ request: JsChallengeRequest
+ response: JsChallengeResponse | None = None
+ error: Exception | None = None
+
+
+@dataclasses.dataclass
+class JsChallengeResponse:
+ type: JsChallengeType
+ output: NChallengeOutput | SigChallengeOutput
+
+
+class JsChallengeProviderRejectedRequest(IEContentProviderError):
+ """Reject the JsChallengeRequest (cannot handle the request)"""
+
+
+class JsChallengeProviderError(IEContentProviderError):
+ """An error occurred while solving the challenge"""
+
+
+class JsChallengeProvider(IEContentProvider, abc.ABC, suffix='JCP'):
+
+ # Set to None to disable the check
+ _SUPPORTED_TYPES: tuple[JsChallengeType] | None = ()
+
+ def __validate_request(self, request: JsChallengeRequest):
+ if not self.is_available():
+ raise JsChallengeProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
+
+ # Validate request using built-in settings
+ if (
+ self._SUPPORTED_TYPES is not None
+ and request.type not in self._SUPPORTED_TYPES
+ ):
+ raise JsChallengeProviderRejectedRequest(
+ f'JS Challenge type "{request.type}" is not supported by {self.PROVIDER_NAME}')
+
+ def bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ """Solve multiple JS challenges and return the results"""
+ validated_requests = []
+ for request in requests:
+ try:
+ self.__validate_request(request)
+ validated_requests.append(request)
+ except JsChallengeProviderRejectedRequest as e:
+ yield JsChallengeProviderResponse(request=request, error=e)
+ continue
+ yield from self._real_bulk_solve(validated_requests)
+
+ @abc.abstractmethod
+ def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
+ """Subclasses can override this method to handle bulk solving"""
+ raise NotImplementedError(f'{self.PROVIDER_NAME} does not implement bulk solving')
+
+ def _get_player(self, video_id, player_url):
+ try:
+ return self.ie._load_player(
+ video_id=video_id,
+ player_url=player_url,
+ fatal=True,
+ )
+ except ExtractorError as e:
+ raise JsChallengeProviderError(
+ f'Failed to load player for JS challenge: {e}') from e
+
+
+def register_provider(provider: type[JsChallengeProvider]):
+ """Register a JsChallengeProvider class"""
+ return register_provider_generic(
+ provider=provider,
+ base_class=JsChallengeProvider,
+ registry=_jsc_providers.value,
+ )
+
+
+def register_preference(*providers: type[JsChallengeProvider]) -> typing.Callable[[Preference], Preference]:
+ """Register a preference for a JsChallengeProvider class."""
+ return register_preference_generic(
+ JsChallengeProvider,
+ _jsc_preferences.value,
+ *providers,
+ )
+
+
+if typing.TYPE_CHECKING:
+ Preference = typing.Callable[[JsChallengeProvider, list[JsChallengeRequest]], int]
+ __all__.append('Preference')
Index: yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/extractor/youtube/jsc/utils.py 2025-10-19 19:15:20.548108679 +0200
@@ -0,0 +1 @@
+"""PUBLIC API"""
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_director.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_director.py 2025-10-19 19:15:07.672296399 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_director.py 2025-10-19 19:15:20.548316738 +0200
@@ -6,6 +6,7 @@
import datetime as dt
import hashlib
import json
+import traceback
import typing
import urllib.parse
from collections.abc import Iterable
@@ -58,9 +59,9 @@
if self.log_level <= self.LogLevel.TRACE:
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
- def debug(self, message: str):
+ def debug(self, message: str, *, once=False):
if self.log_level <= self.LogLevel.DEBUG:
- self.__ie.write_debug(self._format_msg(message))
+ self.__ie.write_debug(self._format_msg(message), only_once=once)
def info(self, message: str):
if self.log_level <= self.LogLevel.INFO:
@@ -70,9 +71,11 @@
if self.log_level <= self.LogLevel.WARNING:
self.__ie.report_warning(self._format_msg(message), only_once=once)
- def error(self, message: str):
+ def error(self, message: str, cause=None):
if self.log_level <= self.LogLevel.ERROR:
- self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
+ self.__ie._downloader.report_error(
+ self._format_msg(message), is_error=False,
+ tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)) if cause else None)
class PoTokenCache:
Index: yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py
===================================================================
--- yt-dlp.orig/yt_dlp/extractor/youtube/pot/_provider.py 2025-10-19 19:15:07.672900183 +0200
+++ yt-dlp/yt_dlp/extractor/youtube/pot/_provider.py 2025-10-19 19:15:20.548594150 +0200
@@ -36,7 +36,7 @@
pass
@abc.abstractmethod
- def debug(self, message: str):
+ def debug(self, message: str, *, once=False):
pass
@abc.abstractmethod
@@ -48,7 +48,7 @@
pass
@abc.abstractmethod
- def error(self, message: str):
+ def error(self, message: str, cause=None):
pass
@@ -90,7 +90,7 @@
@classproperty
def PROVIDER_KEY(cls) -> str:
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
- assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
+ assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'Class name must end with "{cls._PROVIDER_KEY_SUFFIX}"'
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
@abc.abstractmethod
Index: yt-dlp/yt_dlp/globals.py
===================================================================
--- yt-dlp.orig/yt_dlp/globals.py 2025-10-19 19:15:07.643052484 +0200
+++ yt-dlp/yt_dlp/globals.py 2025-10-19 19:15:20.548781675 +0200
@@ -1,3 +1,4 @@
+from __future__ import annotations
import os
from collections import defaultdict
@@ -30,3 +31,8 @@
IN_CLI = Indirect(False)
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)
+
+# JS Runtimes
+# If adding support for another runtime, register it here to allow `js_runtimes` option to accept it.
+# key is the runtime name, value is None or a JsRuntime subclass (internal-only)
+supported_js_runtimes = Indirect({})
Index: yt-dlp/yt_dlp/options.py
===================================================================
--- yt-dlp.orig/yt_dlp/options.py 2025-10-19 19:15:07.642290020 +0200
+++ yt-dlp/yt_dlp/options.py 2025-10-19 19:15:20.549034014 +0200
@@ -457,6 +457,41 @@
dest='plugin_dirs', action='store_const', const=[],
help='Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs')
general.add_option(
+ '--js-runtimes',
+ metavar='RUNTIME[:PATH]',
+ dest='js_runtimes',
+ action='callback',
+ callback=_list_from_options_callback,
+ type='str',
+ callback_kwargs={'delim': None},
+ default=['deno'],
+ help=(
+ 'Additional JavaScript runtime to enable, with an optional path to the runtime location. '
+ 'This option can be used multiple times to enable multiple runtimes. '
+ 'Supported runtimes: deno, node, bun. By default, only "deno" runtime is enabled.'))
+ general.add_option(
+ '--no-js-runtimes',
+ dest='js_runtimes', action='store_const', const=[],
+ help='Clear JavaScript runtimes to enable, including defaults and those provided by previous --js-runtimes')
+ general.add_option(
+ '--download-ext-components',
+ metavar='COMPONENT',
+ dest='download_ext_components',
+ action='callback',
+ callback=_list_from_options_callback,
+ type='str',
+ callback_kwargs={'delim': None},
+ default=[],
+ help=(
+ 'Specify external components that yt-dlp is allowed to download when needed. '
+ 'You can use this option multiple times to allow multiple components. '
+ 'Supported values: npm (JavaScript dependencies from npm), ejs-github (official JS scripts from yt-dlp-ejs GitHub). '
+ 'By default, no external components are allowed.'))
+ general.add_option(
+ '--no-download-ext-components',
+ dest='download_ext_components', action='store_const', const=[],
+ help='Disallow downloading of all external components, including any previously allowed by --download-ext-components or defaults.')
+ general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False,
help=(
Index: yt-dlp/yt_dlp/utils/_jsruntime.py
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ yt-dlp/yt_dlp/utils/_jsruntime.py 2025-10-19 19:15:20.549398030 +0200
@@ -0,0 +1,57 @@
+from __future__ import annotations
+import abc
+import dataclasses
+import functools
+
+from ._utils import _get_exe_version_output, detect_exe_version
+
+
+@dataclasses.dataclass(frozen=True)
+class JsRuntimeInfo:
+ name: str
+ path: str
+ version: str
+ supported: bool = True
+
+
+class JsRuntime(abc.ABC):
+ def __init__(self, path=None):
+ self._path = path
+
+ @functools.cached_property
+ def info(self) -> JsRuntimeInfo | None:
+ return self._info()
+
+ @abc.abstractmethod
+ def _info(self) -> JsRuntimeInfo | None:
+ raise NotImplementedError
+
+
+class DenoJsRuntime(JsRuntime):
+ def _info(self):
+ deno_path = self._path or 'deno'
+ out = _get_exe_version_output(deno_path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^deno (\S+)')
+ return JsRuntimeInfo(name='deno', path=deno_path, version=version)
+
+
+class BunJsRuntime(JsRuntime):
+ def _info(self):
+ path = self._path or 'bun'
+ out = _get_exe_version_output(path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^(\S+)')
+ return JsRuntimeInfo(name='bun', path=path, version=version)
+
+
+class NodeJsRuntime(JsRuntime):
+ def _info(self):
+ node_path = self._path or 'node'
+ out = _get_exe_version_output(node_path, ['--version'])
+ if not out:
+ return None
+ version = detect_exe_version(out, r'^v(\S+)')
+ return JsRuntimeInfo(name='node', path=node_path, version=version)