Files
claude-code-api/tests/test_normalizer.py
T

422 lines
12 KiB
Python

"""Unit tests for Layer 3 (`event_normalizer.normalize`).
All fixtures are hand-built dicts shaped like real records observed under
``~/.claude/projects/``; no `claude` is invoked. The normalizer is a pure
function so every test is a one-shot ``normalize(record) -> Event | None``
assertion.
"""
from __future__ import annotations
from typing import Any
import pytest
from claude_code_api import (
AssistantMessage,
MessageParseError,
SystemMessage,
TextBlock,
ThinkingBlock,
ToolResultBlock,
ToolUseBlock,
UserMessage,
normalize,
)
# --- envelope metadata shared by every record observed in the wild ---------
_ENVELOPE: dict[str, Any] = {
"parentUuid": "parent-uuid",
"isSidechain": False,
"uuid": "rec-uuid",
"timestamp": "2026-05-16T20:17:27.664Z",
"userType": "external",
"entrypoint": "cli",
"cwd": "/some/cwd",
"sessionId": "sess-uuid",
"version": "2.1.143",
"gitBranch": "HEAD",
}
def _envelope(extra: dict[str, Any]) -> dict[str, Any]:
"""Compose a record with the standard envelope plus the type-specific bits."""
return {**_ENVELOPE, **extra}
# --- user records ----------------------------------------------------------
def test_user_string_content() -> None:
rec = _envelope(
{
"type": "user",
"message": {"role": "user", "content": "hello there"},
}
)
event = normalize(rec)
assert isinstance(event, UserMessage)
assert event.content == "hello there"
assert event.uuid == "rec-uuid"
assert event.session_id == "sess-uuid"
assert event.parent_uuid == "parent-uuid"
def test_user_tool_result_content() -> None:
rec = _envelope(
{
"type": "user",
"message": {
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "toolu_01",
"content": "stdout body",
"is_error": False,
}
],
},
}
)
event = normalize(rec)
assert isinstance(event, UserMessage)
assert isinstance(event.content, list)
assert event.content == [
ToolResultBlock(
tool_use_id="toolu_01",
content="stdout body",
is_error=False,
)
]
def test_user_meta_filtered_by_default() -> None:
rec = _envelope(
{
"type": "user",
"isMeta": True,
"message": {"role": "user", "content": "<local-command-caveat>...</...>"},
}
)
assert normalize(rec) is None
def test_user_meta_emitted_when_opt_in() -> None:
rec = _envelope(
{
"type": "user",
"isMeta": True,
"message": {"role": "user", "content": "x"},
}
)
event = normalize(rec, include_meta_user=True)
assert isinstance(event, UserMessage)
assert event.content == "x"
def test_user_missing_message_raises() -> None:
rec = _envelope({"type": "user"})
with pytest.raises(MessageParseError, match="user record missing"):
normalize(rec)
def test_user_content_wrong_type_raises() -> None:
rec = _envelope({"type": "user", "message": {"content": 42}})
with pytest.raises(MessageParseError, match="content must be str or list"):
normalize(rec)
# --- assistant records -----------------------------------------------------
def test_assistant_text_only() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"id": "msg_01",
"role": "assistant",
"content": [{"type": "text", "text": "hi"}],
"stop_reason": "end_turn",
"usage": {"input_tokens": 1, "output_tokens": 2},
},
}
)
event = normalize(rec)
assert isinstance(event, AssistantMessage)
assert event.content == [TextBlock(text="hi")]
assert event.model == "claude-opus-4-7"
assert event.message_id == "msg_01"
assert event.stop_reason == "end_turn"
assert event.usage == {"input_tokens": 1, "output_tokens": 2}
def test_assistant_all_block_types() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "...", "signature": "sig"},
{"type": "text", "text": "calling tool"},
{
"type": "tool_use",
"id": "toolu_01",
"name": "Bash",
"input": {"command": "ls"},
},
],
"stop_reason": "tool_use",
},
}
)
event = normalize(rec)
assert isinstance(event, AssistantMessage)
assert event.content == [
ThinkingBlock(thinking="...", signature="sig"),
TextBlock(text="calling tool"),
ToolUseBlock(id="toolu_01", name="Bash", input={"command": "ls"}),
]
assert event.stop_reason == "tool_use"
def test_assistant_streaming_chunk_has_null_stop_reason() -> None:
# claude writes partial assistant records mid-turn with stop_reason=null;
# the normalizer surfaces the None so TurnManager can tell partial from
# terminal.
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"role": "assistant",
"content": [{"type": "text", "text": "partial"}],
"stop_reason": None,
},
}
)
event = normalize(rec)
assert isinstance(event, AssistantMessage)
assert event.stop_reason is None
def test_assistant_missing_model_raises() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {"role": "assistant", "content": []},
}
)
with pytest.raises(MessageParseError, match="assistant record missing"):
normalize(rec)
def test_assistant_content_not_list_raises() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"role": "assistant",
"content": "not a list",
},
}
)
with pytest.raises(MessageParseError, match="content must be a list"):
normalize(rec)
def test_assistant_unknown_block_type_raises() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"role": "assistant",
"content": [{"type": "image", "data": "..."}],
},
}
)
with pytest.raises(MessageParseError, match="unknown content block type"):
normalize(rec)
def test_assistant_tool_use_missing_id_raises() -> None:
rec = _envelope(
{
"type": "assistant",
"message": {
"model": "claude-opus-4-7",
"role": "assistant",
"content": [{"type": "tool_use", "name": "X", "input": {}}],
},
}
)
with pytest.raises(MessageParseError, match="tool_use block missing"):
normalize(rec)
# --- system records --------------------------------------------------------
def test_system_turn_duration_surfaced() -> None:
rec = _envelope(
{
"type": "system",
"subtype": "turn_duration",
"durationMs": 1234,
"messageCount": 5,
"isMeta": False,
}
)
event = normalize(rec)
assert isinstance(event, SystemMessage)
assert event.subtype == "turn_duration"
assert event.session_id == "sess-uuid"
# `data` mirrors the full raw record so callers can pull `durationMs`
# without re-parsing.
assert event.data["durationMs"] == 1234
assert event.data["messageCount"] == 5
def test_system_stop_hook_summary_filtered() -> None:
rec = _envelope(
{
"type": "system",
"subtype": "stop_hook_summary",
"hookCount": 0,
"hookInfos": [],
}
)
assert normalize(rec) is None
def test_system_local_command_filtered() -> None:
rec = _envelope(
{
"type": "system",
"subtype": "local_command",
"content": "<local-command-stdout></local-command-stdout>",
}
)
assert normalize(rec) is None
def test_system_missing_subtype_raises() -> None:
rec = _envelope({"type": "system"})
with pytest.raises(MessageParseError, match="system record missing 'subtype'"):
normalize(rec)
# --- filtered top-level types ---------------------------------------------
@pytest.mark.parametrize(
"record_type",
[
"attachment",
"file-history-snapshot",
"last-prompt",
"ai-title",
"permission-mode",
"queue-operation",
],
)
def test_bookkeeping_types_filtered(record_type: str) -> None:
rec = _envelope({"type": record_type})
assert normalize(rec) is None
def test_unknown_type_silently_dropped() -> None:
# forward-compat: a brand-new top-level record type from a future claude
# version is dropped, not raised.
rec = _envelope({"type": "some-new-record-type"})
assert normalize(rec) is None
# --- error path ------------------------------------------------------------
def test_non_dict_record_raises() -> None:
with pytest.raises(MessageParseError, match="must be a dict"):
normalize("not a dict") # type: ignore[arg-type]
def test_record_missing_type_raises() -> None:
rec = _envelope({})
with pytest.raises(MessageParseError, match="record missing 'type'"):
normalize(rec)
# --- regression fixtures from real session ---------------------------------
def test_real_user_string_record() -> None:
"""Copy-paste of an actual user prompt record from a 2.1.143 session."""
rec = {
"parentUuid": None,
"isSidechain": False,
"promptId": "364db1ee-f587-4096-bc6c-0dc4323512dc",
"type": "user",
"message": {"role": "user", "content": "What is my name?"},
"uuid": "97968a26-6466-4410-84db-2077e65573e1",
"timestamp": "2026-05-16T20:17:27.664Z",
"userType": "external",
"entrypoint": "cli",
"cwd": "/Users/h/projects/playgrounds/claude-code-sdk",
"sessionId": "4df01eee-6026-4782-bdba-d67ab47a3e5b",
"version": "2.1.143",
"gitBranch": "HEAD",
}
event = normalize(rec)
assert isinstance(event, UserMessage)
assert event.content == "What is my name?"
assert event.parent_uuid is None
def test_real_assistant_tool_use_record() -> None:
"""Copy-paste of a real ``stop_reason=tool_use`` assistant record."""
rec = {
"parentUuid": "97968a26-6466-4410-84db-2077e65573e1",
"isSidechain": False,
"message": {
"model": "claude-opus-4-7",
"id": "msg_019Sy3eBbN24Y6YwgxuMvN7g",
"type": "message",
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "...", "signature": "sig"},
{
"type": "tool_use",
"id": "toolu_01XCXcKt7TaDbAKscRPpvumi",
"name": "Bash",
"input": {"command": "ls"},
},
],
"stop_reason": "tool_use",
"usage": {
"input_tokens": 6,
"cache_creation_input_tokens": 11211,
"cache_read_input_tokens": 17654,
"output_tokens": 172,
},
},
"requestId": "req_011Cb6s6f7fhCRgo2yhNZY9G",
"type": "assistant",
"uuid": "14e394aa-9faa-4448-8a6c-1365bf2acb8a",
"sessionId": "4df01eee-6026-4782-bdba-d67ab47a3e5b",
}
event = normalize(rec)
assert isinstance(event, AssistantMessage)
assert event.stop_reason == "tool_use"
assert event.usage is not None
assert event.usage["cache_read_input_tokens"] == 17654
assert len(event.content) == 2
assert isinstance(event.content[1], ToolUseBlock)
assert event.content[1].name == "Bash"