claude-code-api/tests/test_injection.py

"""Unit tests for `history_injection` helpers.

Pure functions, no claude / no filesystem. The seed-JSONL shape is regression
tested against the format claude 2.1.147 itself writes (verified empirically
against real session transcripts).
"""

from __future__ import annotations

import json

import pytest

from claude_code_api.injection import (
    build_concat_prompt,
    build_seed_jsonl,
    hash_history,
)

# --- hash_history ---------------------------------------------------------


def test_hash_history_empty_is_stable() -> None:
    assert hash_history([]) == hash_history([])


def test_hash_history_distinguishes_content() -> None:
    a = [{"role": "user", "content": "hi"}]
    b = [{"role": "user", "content": "bye"}]
    assert hash_history(a) != hash_history(b)


def test_hash_history_ignores_block_key_order() -> None:
    """Two clients that serialize the same block in different key orders
    must collide. Canonical-JSON serialization handles this."""
    a = [
        {
            "role": "assistant",
            "content": [{"type": "tool_use", "id": "t1", "name": "echo", "input": {"x": 1}}],
        }
    ]
    b = [
        {
            "role": "assistant",
            "content": [{"input": {"x": 1}, "name": "echo", "id": "t1", "type": "tool_use"}],
        }
    ]
    assert hash_history(a) == hash_history(b)


def test_hash_history_rejects_unknown_role() -> None:
    with pytest.raises(ValueError, match="role"):
        hash_history([{"role": "system", "content": "x"}])


def test_hash_history_text_blocks_collide_with_string_form() -> None:
    """A bare string `content` and the equivalent single text block hash to
    DIFFERENT values. They represent the same semantic content but appear
    on the wire differently — the gateway must pick one form per role and
    stay consistent. We don't try to paper over that here."""
    a = [{"role": "user", "content": "hello"}]
    b = [{"role": "user", "content": [{"type": "text", "text": "hello"}]}]
    assert hash_history(a) != hash_history(b)


# --- build_seed_jsonl -----------------------------------------------------


def _records(seed: str) -> list[dict]:
    return [json.loads(line) for line in seed.strip().splitlines()]


def test_build_seed_jsonl_empty_is_empty_string() -> None:
    assert build_seed_jsonl([], session_id="s", cwd="/tmp") == ""


def test_build_seed_jsonl_starts_with_permission_mode() -> None:
    """Non-empty seed must lead with a `permission-mode` record — claude
    2.1.147 writes one at session start and expects it on resume."""
    seed = build_seed_jsonl(
        [{"role": "user", "content": "hi"}],
        session_id="sid-1",
        cwd="/work",
    )
    recs = _records(seed)
    assert recs[0]["type"] == "permission-mode"
    assert recs[0]["sessionId"] == "sid-1"
    assert recs[0]["permissionMode"] == "bypassPermissions"


def test_build_seed_jsonl_snapshot_precedes_each_user_prompt() -> None:
    """Before every new user prompt, claude writes a
    `file-history-snapshot` whose `messageId` matches the user record's
    `uuid`. Resume parsing depends on this pairing."""
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u1"},
            {"role": "assistant", "content": "a1"},
            {"role": "user", "content": "u2"},
            {"role": "assistant", "content": "a2"},
        ],
        session_id="s",
        cwd="/tmp",
    )
    recs = _records(seed)
    snapshots = [r for r in recs if r.get("type") == "file-history-snapshot"]
    users = [r for r in recs if r.get("type") == "user"]
    assert len(snapshots) == 2
    assert len(users) == 2
    for snap, usr in zip(snapshots, users, strict=True):
        assert snap["messageId"] == usr["uuid"]
        assert snap["snapshot"]["messageId"] == usr["uuid"]


def test_build_seed_jsonl_user_record_has_permission_mode_no_isMeta() -> None:
    """User records carry `permissionMode` but no `isMeta` — that's what
    claude 2.1.147 writes. Adding `isMeta` to user records is one of the
    things that made the old seed look 'wrong' on strict resume."""
    seed = build_seed_jsonl(
        [{"role": "user", "content": "hi"}],
        session_id="s",
        cwd="/tmp",
    )
    user_rec = next(r for r in _records(seed) if r.get("type") == "user")
    assert user_rec["permissionMode"] == "bypassPermissions"
    assert "isMeta" not in user_rec
    assert user_rec["version"] == "2.1.147"
    assert user_rec["gitBranch"] == "HEAD"
    assert "promptId" in user_rec


def test_build_seed_jsonl_assistant_string_content_wraps_as_text_block() -> None:
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u"},
            {"role": "assistant", "content": "Got it."},
        ],
        session_id="s",
        cwd="/tmp",
    )
    asst_recs = [r for r in _records(seed) if r.get("type") == "assistant"]
    assert len(asst_recs) == 1
    msg = asst_recs[0]["message"]
    assert msg["content"] == [{"type": "text", "text": "Got it."}]
    assert msg["stop_reason"] == "end_turn"
    assert msg["diagnostics"] is None
    assert "server_tool_use" in msg["usage"]
    assert "cache_creation" in msg["usage"]


def test_build_seed_jsonl_splits_assistant_blocks_into_separate_records() -> None:
    """A multi-block assistant message becomes one record per block, all
    sharing the same `msg_id` and `requestId`. parentUuid chains them."""
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u"},
            {
                "role": "assistant",
                "content": [
                    {"type": "thinking", "thinking": "ponder", "signature": "sig"},
                    {"type": "tool_use", "id": "tu1", "name": "bash", "input": {}},
                ],
            },
        ],
        session_id="s",
        cwd="/tmp",
    )
    asst_recs = [r for r in _records(seed) if r.get("type") == "assistant"]
    assert len(asst_recs) == 2
    assert asst_recs[0]["message"]["id"] == asst_recs[1]["message"]["id"]
    assert asst_recs[0]["requestId"] == asst_recs[1]["requestId"]
    assert asst_recs[1]["parentUuid"] == asst_recs[0]["uuid"]
    assert asst_recs[0]["message"]["content"][0]["type"] == "thinking"
    assert asst_recs[1]["message"]["content"][0]["type"] == "tool_use"
    assert asst_recs[0]["message"]["stop_reason"] == "tool_use"


def test_build_seed_jsonl_tool_result_parents_to_matching_tool_use() -> None:
    """A user message with a `tool_result` block becomes a user record
    whose parentUuid points to the assistant record that emitted the
    matching `tool_use`. `sourceToolAssistantUUID` mirrors that link."""
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u"},
            {
                "role": "assistant",
                "content": [
                    {"type": "tool_use", "id": "tu1", "name": "bash", "input": {}}
                ],
            },
            {
                "role": "user",
                "content": [
                    {"type": "tool_result", "tool_use_id": "tu1", "content": "ok"}
                ],
            },
        ],
        session_id="s",
        cwd="/tmp",
    )
    recs = _records(seed)
    asst = next(r for r in recs if r.get("type") == "assistant")
    # second user record is the tool_result one (first was the prompt)
    user_recs = [r for r in recs if r.get("type") == "user"]
    tool_result_rec = user_recs[1]
    assert tool_result_rec["parentUuid"] == asst["uuid"]
    assert tool_result_rec["sourceToolAssistantUUID"] == asst["uuid"]
    assert tool_result_rec["toolUseResult"] == "ok"
    assert tool_result_rec["message"]["content"] == [
        {"type": "tool_result", "tool_use_id": "tu1", "content": "ok"}
    ]


def test_build_seed_jsonl_splits_multi_tool_result_user_message() -> None:
    """When a user message carries multiple tool_result blocks (one per
    parallel tool_use), claude writes one record per result. Each parents
    on the corresponding assistant record."""
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u"},
            {
                "role": "assistant",
                "content": [
                    {"type": "tool_use", "id": "A", "name": "x", "input": {}}
                ],
            },
            {
                "role": "assistant",
                "content": [
                    {"type": "tool_use", "id": "B", "name": "y", "input": {}}
                ],
            },
            {
                "role": "user",
                "content": [
                    {"type": "tool_result", "tool_use_id": "A", "content": "a"},
                    {"type": "tool_result", "tool_use_id": "B", "content": "b"},
                ],
            },
        ],
        session_id="s",
        cwd="/tmp",
    )
    recs = _records(seed)
    asst_recs = [r for r in recs if r.get("type") == "assistant"]
    tool_result_recs = [
        r
        for r in recs
        if r.get("type") == "user"
        and isinstance(r["message"]["content"], list)
        and r["message"]["content"][0].get("type") == "tool_result"
    ]
    assert len(tool_result_recs) == 2
    a_uuid = next(
        r["uuid"]
        for r in asst_recs
        if r["message"]["content"][0].get("id") == "A"
    )
    b_uuid = next(
        r["uuid"]
        for r in asst_recs
        if r["message"]["content"][0].get("id") == "B"
    )
    assert tool_result_recs[0]["sourceToolAssistantUUID"] == a_uuid
    assert tool_result_recs[1]["sourceToolAssistantUUID"] == b_uuid


def test_build_seed_jsonl_chains_parent_uuids_linearly() -> None:
    """Every content-carrying record (user, assistant, tool_result) chains
    via parentUuid back through the record graph. The first user has
    parentUuid=None; subsequent records have non-null parents."""
    seed = build_seed_jsonl(
        [
            {"role": "user", "content": "u1"},
            {"role": "assistant", "content": "a1"},
            {"role": "user", "content": "u2"},
            {"role": "assistant", "content": "a2"},
        ],
        session_id="s",
        cwd="/tmp",
    )
    chain = [
        r
        for r in _records(seed)
        if r.get("type") in ("user", "assistant")
    ]
    assert chain[0]["parentUuid"] is None
    for prev, nxt in zip(chain, chain[1:], strict=False):
        assert nxt["parentUuid"] == prev["uuid"]


def test_build_seed_jsonl_rejects_unknown_role() -> None:
    with pytest.raises(ValueError, match="role"):
        build_seed_jsonl(
            [{"role": "system", "content": "x"}], session_id="s", cwd="/tmp"
        )


# --- build_concat_prompt --------------------------------------------------


def test_build_concat_prompt_empty_history_returns_just_last_user() -> None:
    assert build_concat_prompt([], "hello") == "hello"


def test_build_concat_prompt_renders_alternating_history() -> None:
    out = build_concat_prompt(
        [
            {"role": "user", "content": "u1"},
            {"role": "assistant", "content": "a1"},
            {"role": "user", "content": "u2"},
            {"role": "assistant", "content": "a2"},
        ],
        "u3",
    )
    assert "Previous conversation context:" in out
    assert "[User]: u1" in out
    assert "[Assistant]: a1" in out
    assert "[User]: u2" in out
    assert "[Assistant]: a2" in out
    assert "Continue from here. New user message: u3" in out
    # The new prompt must come after the history, not interleaved.
    assert out.index("[Assistant]: a2") < out.index("Continue from here")


def test_build_concat_prompt_flattens_text_blocks_and_skips_tools() -> None:
    """Content-as-list with text blocks gets flattened; tool blocks are
    skipped (they don't round-trip through stdin in any useful form)."""
    out = build_concat_prompt(
        [
            {
                "role": "assistant",
                "content": [
                    {"type": "text", "text": "hello"},
                    {"type": "tool_use", "id": "t1", "name": "x", "input": {}},
                    {"type": "text", "text": "world"},
                ],
            },
        ],
        "ping",
    )
    assert "[Assistant]: hello world" in out