Files
raycast-api/src/raycast_api/discovery/extractors.py
T

303 lines
11 KiB
Python

"""High-level extractors that turn a parsed bundle into a `SigningSpec`.
The structural matchers in `ast_parse` give us candidate functions; the
extractors here verify those candidates against the documented shape of
`Sur`/`Nkt` (rot + HMAC signer) and tease out the few parameters that aren't
visually identical between minified builds:
- rot ranges and shifts (might rotate keys differently in future builds)
- canonical-string join character (currently ".")
- body-hash algorithm (currently "SHA-256")
- HMAC hash (currently "SHA-256")
- key encoding (currently utf-8 of the hex string AS-IS, per HANDOFF.md)
This produces a `SigningSpec` that the runtime signer consumes — `sign.py`'s
constants become `SigningSpec` fields, so the whole signing pipeline is
data-driven and re-derives on every Raycast update.
"""
from __future__ import annotations
import plistlib
from typing import TYPE_CHECKING, Any
from raycast_api.discovery.ast_parse import (
FunctionInfo,
find_calls,
find_function_by_shape,
has_string_literal,
iter_function_declarations,
walk_ast,
)
from raycast_api.errors import DiscoveryError
from raycast_api.signing_spec import RotRange, SigningSpec
if TYPE_CHECKING:
from pathlib import Path
__all__ = ["extract_signing_spec", "extract_user_agent_template", "read_app_version"]
def extract_signing_spec(bundle_source: str) -> SigningSpec:
"""Find the signing primitives in the bundle and return a `SigningSpec`.
Strategy (in order):
1. Enumerate all top-level function declarations.
2. Find the rot function by the literal triplet `(65, 90, 13)`,
`(97, 122, 13)`, `(48, 57, 5)` co-located inside one 1-param fn.
Minifier renames don't touch numeric literals; these five constants
(plus 26 and 10) uniquely identify rot13+rot5.
3. Find the signing function: an async 4-param fn that imports an HMAC key
and calls .map(<rotFnName>) on a 3-element array. The verifier here is
strict: we require it to mention the rot fn we just found by name, so
we can't accidentally pick up an unrelated HMAC routine.
4. Read the join character from the signing fn's `.join(...)` call and
the digest/HMAC algorithm strings from the crypto.subtle calls.
`bundle_source` is the raw JS text — we don't need a pre-beautified copy.
"""
fns = list(iter_function_declarations(bundle_source))
if not fns:
msg = "No function declarations found in bundle source"
raise DiscoveryError(msg)
rot, signing = _find_rot_and_signing(fns)
join_char = _extract_join_char(signing, rot_fn_name=rot.name)
digest_algo = _extract_digest_algo(signing)
hmac_algo = _extract_hmac_algo(signing)
rot_ranges = _extract_rot_ranges(rot)
return SigningSpec(
rot_fn_name=rot.name,
signing_fn_name=signing.name,
rot_ranges=rot_ranges,
join_char=join_char,
body_hash_algorithm=digest_algo,
hmac_algorithm=hmac_algo,
key_encoding="utf-8",
output_encoding="hex-lower",
)
def _find_rot_and_signing(fns: list[FunctionInfo]) -> tuple[FunctionInfo, FunctionInfo]:
"""Find a (rot, signing) pair where signing calls .map(rot.name).
Several rot candidates may exist (the bundle has two byte-identical copies,
`Sur` and `Tur`). Several signing candidates similarly (`Nkt` is unique by
the 4-param shape but if a future build splits the canonical path, we want
to handle the ambiguity). We resolve by requiring the rot fn referenced by
the signing fn's `.map(...)` call to be among the rot candidates.
"""
rot_candidates = find_function_by_shape(
fns, param_count=1, custom=[_has_required_rot_triplets]
)
if not rot_candidates:
msg = (
"No rot13+rot5 candidate "
"(1 param, all of (65,90,13)/(97,122,13)/(48,57,5))"
)
raise DiscoveryError(msg)
rot_by_name = {f.name: f for f in rot_candidates}
sign_candidates = find_function_by_shape(
fns,
is_async=True,
param_count=4,
body_contains_all=["HMAC", "SHA-256", "importKey"],
custom=[
lambda f: has_string_literal(f, "HMAC"),
lambda f: has_string_literal(f, "SHA-256"),
],
)
if not sign_candidates:
msg = "No signing candidate (async, 4 params, HMAC+SHA-256+importKey)"
raise DiscoveryError(
msg
)
pairs: list[tuple[FunctionInfo, FunctionInfo]] = []
for sign in sign_candidates:
for name in _map_argument_identifiers(sign):
if name in rot_by_name:
pairs.append((rot_by_name[name], sign))
break
if not pairs:
msg = (
f"Found rot candidates {list(rot_by_name)} and signing candidates "
f"{[s.name for s in sign_candidates]} but none of the signers calls "
f".map(<rotName>)"
)
raise DiscoveryError(
msg
)
return pairs[0]
def _map_argument_identifiers(fn: FunctionInfo) -> list[str]:
"""Return identifier names passed to any `.map(...)` call inside fn."""
names: list[str] = []
for node in find_calls(fn, "map"):
args = node.get("arguments", [])
if (
len(args) == 1
and isinstance(args[0], dict)
and args[0].get("type") == "Identifier"
):
names.append(args[0].get("name", ""))
return names
def _has_required_rot_triplets(fn: FunctionInfo) -> bool:
"""True iff the fn contains all three (start, end, shift) triplets as numerics.
We don't try to parse the *structure* of the conditional chain — too many
valid shapes (if/else vs ternary vs switch). The numeric fingerprint is
enough on its own; the 1-param shape filter prevents false positives from
unrelated maths.
"""
nums = _collect_numeric_literals(fn)
needed = {65, 90, 13, 26, 97, 122, 48, 57, 5, 10}
return needed.issubset(nums)
def _collect_numeric_literals(fn: FunctionInfo) -> set[int]:
out: set[int] = set()
for node in walk_ast(fn.ast):
if not (isinstance(node, dict) and node.get("type") == "Literal"):
continue
v = node.get("value")
if isinstance(v, (int, float)) and float(v).is_integer():
out.add(int(v))
return out
def _extract_rot_ranges(rot: FunctionInfo) -> list[RotRange]: # noqa: ARG001 — kept for future structural derivation
"""Return the rot transform parameters as a list of (start, end, shift) ranges.
For now we hardcode the three triplets we matched against — the structural
matcher already confirmed they're present. If future builds add/remove a
range, this is the place to teach the extractor to walk the conditional chain
and discover them dynamically.
"""
return [
RotRange(start=65, end=90, shift=13),
RotRange(start=97, end=122, shift=13),
RotRange(start=48, end=57, shift=5),
]
def _extract_join_char(fn: FunctionInfo, rot_fn_name: str) -> str:
"""Find the `.join("X")` whose receiver is `<arr>.map(<rotName>)`.
The signing fn body has several `.map(...).join(...)` chains — the hex
encoder uses `.join("")`, the canonical-string builder uses `.join(".")`.
We pick the one whose `.map`'s sole argument is the rot fn identifier.
"""
for call in find_calls(fn, "join"):
callee = call.get("callee", {})
if callee.get("type") != "MemberExpression":
continue
receiver = callee.get("object", {})
if receiver.get("type") != "CallExpression":
continue
r_callee = receiver.get("callee", {})
if not (
r_callee.get("type") == "MemberExpression"
and r_callee.get("property", {}).get("name") == "map"
):
continue
r_args = receiver.get("arguments", [])
if not (
len(r_args) == 1
and r_args[0].get("type") == "Identifier"
and r_args[0].get("name") == rot_fn_name
):
continue
args = call.get("arguments", [])
if args and args[0].get("type") == "Literal":
val = args[0].get("value")
if isinstance(val, str):
return val
msg = f"Could not find `.map({rot_fn_name}).join(<str>)` in `{fn.name}`"
raise DiscoveryError(
msg
)
def _extract_digest_algo(fn: FunctionInfo) -> str:
"""Read the algorithm name from `crypto.subtle.digest("SHA-256", ...)`."""
for call in find_calls(fn, "digest"):
args = call.get("arguments", [])
if args and args[0].get("type") == "Literal":
v = args[0].get("value")
if isinstance(v, str):
return v
msg = f"No crypto.subtle.digest(...) call in `{fn.name}`"
raise DiscoveryError(msg)
def _extract_hmac_algo(fn: FunctionInfo) -> str:
"""Read the `hash:"SHA-256"` from the HMAC importKey options.
Looks for an object literal containing { name: "HMAC", hash: "<algo>" }
inside the signing fn. That's the importKey args[2] but we don't rely on
position — we walk all ObjectExpressions and find the matching shape.
"""
for node in walk_ast(fn.ast):
if not (isinstance(node, dict) and node.get("type") == "ObjectExpression"):
continue
props: dict[str, Any] = {}
for prop in node.get("properties", []):
key = prop.get("key", {})
value = prop.get("value", {})
if key.get("type") == "Identifier" and value.get("type") == "Literal":
props[key["name"]] = value.get("value")
if props.get("name") == "HMAC":
hash_val = props.get("hash")
if isinstance(hash_val, str):
return hash_val
msg = f"No {{name:'HMAC', hash:'...'}} object in `{fn.name}`"
raise DiscoveryError(msg)
def read_app_version(app_path: Path) -> str:
"""Return `CFBundleShortVersionString` from the app's Info.plist."""
plist_path = app_path / "Contents" / "Info.plist"
if not plist_path.is_file():
msg = f"Missing Info.plist at {plist_path}"
raise DiscoveryError(msg)
with plist_path.open("rb") as f:
plist = plistlib.load(f)
version = plist.get("CFBundleShortVersionString")
if not isinstance(version, str):
msg = f"No CFBundleShortVersionString in {plist_path}"
raise DiscoveryError(msg)
return version
def extract_user_agent_template(
app_path: Path, *, platform: str = "macOS", platform_version: str | None = None
) -> str:
"""Build the `User-Agent` header Raycast sends.
Template (BUNDLE_NOTES §6): `Raycast/<version> (x-<platform> Version <ver>)`.
We default platform to "macOS" because the bundle is macOS-only; future
Windows builds would need this hooked up to a platform argument.
`platform_version` defaults to the host's macOS version, looked up at call
time so a config written on one machine still serializes the host string.
"""
import platform as platform_mod
version = read_app_version(app_path)
if platform_version is None:
platform_version = platform_mod.mac_ver()[0] or "26.0"
return f"Raycast/{version} (x-{platform} Version {platform_version})"