"""Unit tests for `history_injection` helpers. Pure functions, no claude / no filesystem. The seed-JSONL shape is regression tested against the format claude 2.1.147 itself writes (verified empirically against real session transcripts). """ from __future__ import annotations import json import pytest from claude_code_api.injection import ( build_concat_prompt, build_seed_jsonl, hash_history, ) # --- hash_history --------------------------------------------------------- def test_hash_history_empty_is_stable() -> None: assert hash_history([]) == hash_history([]) def test_hash_history_distinguishes_content() -> None: a = [{"role": "user", "content": "hi"}] b = [{"role": "user", "content": "bye"}] assert hash_history(a) != hash_history(b) def test_hash_history_ignores_block_key_order() -> None: """Two clients that serialize the same block in different key orders must collide. Canonical-JSON serialization handles this.""" a = [ { "role": "assistant", "content": [{"type": "tool_use", "id": "t1", "name": "echo", "input": {"x": 1}}], } ] b = [ { "role": "assistant", "content": [{"input": {"x": 1}, "name": "echo", "id": "t1", "type": "tool_use"}], } ] assert hash_history(a) == hash_history(b) def test_hash_history_rejects_unknown_role() -> None: with pytest.raises(ValueError, match="role"): hash_history([{"role": "system", "content": "x"}]) def test_hash_history_text_blocks_collide_with_string_form() -> None: """A bare string `content` and the equivalent single text block hash to DIFFERENT values. They represent the same semantic content but appear on the wire differently — the gateway must pick one form per role and stay consistent. We don't try to paper over that here.""" a = [{"role": "user", "content": "hello"}] b = [{"role": "user", "content": [{"type": "text", "text": "hello"}]}] assert hash_history(a) != hash_history(b) # --- build_seed_jsonl ----------------------------------------------------- def _records(seed: str) -> list[dict]: return [json.loads(line) for line in seed.strip().splitlines()] def test_build_seed_jsonl_empty_is_empty_string() -> None: assert build_seed_jsonl([], session_id="s", cwd="/tmp") == "" def test_build_seed_jsonl_starts_with_permission_mode() -> None: """Non-empty seed must lead with a `permission-mode` record — claude 2.1.147 writes one at session start and expects it on resume.""" seed = build_seed_jsonl( [{"role": "user", "content": "hi"}], session_id="sid-1", cwd="/work", ) recs = _records(seed) assert recs[0]["type"] == "permission-mode" assert recs[0]["sessionId"] == "sid-1" assert recs[0]["permissionMode"] == "bypassPermissions" def test_build_seed_jsonl_snapshot_precedes_each_user_prompt() -> None: """Before every new user prompt, claude writes a `file-history-snapshot` whose `messageId` matches the user record's `uuid`. Resume parsing depends on this pairing.""" seed = build_seed_jsonl( [ {"role": "user", "content": "u1"}, {"role": "assistant", "content": "a1"}, {"role": "user", "content": "u2"}, {"role": "assistant", "content": "a2"}, ], session_id="s", cwd="/tmp", ) recs = _records(seed) snapshots = [r for r in recs if r.get("type") == "file-history-snapshot"] users = [r for r in recs if r.get("type") == "user"] assert len(snapshots) == 2 assert len(users) == 2 for snap, usr in zip(snapshots, users, strict=True): assert snap["messageId"] == usr["uuid"] assert snap["snapshot"]["messageId"] == usr["uuid"] def test_build_seed_jsonl_user_record_has_permission_mode_no_isMeta() -> None: """User records carry `permissionMode` but no `isMeta` — that's what claude 2.1.147 writes. Adding `isMeta` to user records is one of the things that made the old seed look 'wrong' on strict resume.""" seed = build_seed_jsonl( [{"role": "user", "content": "hi"}], session_id="s", cwd="/tmp", ) user_rec = next(r for r in _records(seed) if r.get("type") == "user") assert user_rec["permissionMode"] == "bypassPermissions" assert "isMeta" not in user_rec assert user_rec["version"] == "2.1.147" assert user_rec["gitBranch"] == "HEAD" assert "promptId" in user_rec def test_build_seed_jsonl_assistant_string_content_wraps_as_text_block() -> None: seed = build_seed_jsonl( [ {"role": "user", "content": "u"}, {"role": "assistant", "content": "Got it."}, ], session_id="s", cwd="/tmp", ) asst_recs = [r for r in _records(seed) if r.get("type") == "assistant"] assert len(asst_recs) == 1 msg = asst_recs[0]["message"] assert msg["content"] == [{"type": "text", "text": "Got it."}] assert msg["stop_reason"] == "end_turn" assert msg["diagnostics"] is None assert "server_tool_use" in msg["usage"] assert "cache_creation" in msg["usage"] def test_build_seed_jsonl_splits_assistant_blocks_into_separate_records() -> None: """A multi-block assistant message becomes one record per block, all sharing the same `msg_id` and `requestId`. parentUuid chains them.""" seed = build_seed_jsonl( [ {"role": "user", "content": "u"}, { "role": "assistant", "content": [ {"type": "thinking", "thinking": "ponder", "signature": "sig"}, {"type": "tool_use", "id": "tu1", "name": "bash", "input": {}}, ], }, ], session_id="s", cwd="/tmp", ) asst_recs = [r for r in _records(seed) if r.get("type") == "assistant"] assert len(asst_recs) == 2 assert asst_recs[0]["message"]["id"] == asst_recs[1]["message"]["id"] assert asst_recs[0]["requestId"] == asst_recs[1]["requestId"] assert asst_recs[1]["parentUuid"] == asst_recs[0]["uuid"] assert asst_recs[0]["message"]["content"][0]["type"] == "thinking" assert asst_recs[1]["message"]["content"][0]["type"] == "tool_use" assert asst_recs[0]["message"]["stop_reason"] == "tool_use" def test_build_seed_jsonl_tool_result_parents_to_matching_tool_use() -> None: """A user message with a `tool_result` block becomes a user record whose parentUuid points to the assistant record that emitted the matching `tool_use`. `sourceToolAssistantUUID` mirrors that link.""" seed = build_seed_jsonl( [ {"role": "user", "content": "u"}, { "role": "assistant", "content": [ {"type": "tool_use", "id": "tu1", "name": "bash", "input": {}} ], }, { "role": "user", "content": [ {"type": "tool_result", "tool_use_id": "tu1", "content": "ok"} ], }, ], session_id="s", cwd="/tmp", ) recs = _records(seed) asst = next(r for r in recs if r.get("type") == "assistant") # second user record is the tool_result one (first was the prompt) user_recs = [r for r in recs if r.get("type") == "user"] tool_result_rec = user_recs[1] assert tool_result_rec["parentUuid"] == asst["uuid"] assert tool_result_rec["sourceToolAssistantUUID"] == asst["uuid"] assert tool_result_rec["toolUseResult"] == "ok" assert tool_result_rec["message"]["content"] == [ {"type": "tool_result", "tool_use_id": "tu1", "content": "ok"} ] def test_build_seed_jsonl_splits_multi_tool_result_user_message() -> None: """When a user message carries multiple tool_result blocks (one per parallel tool_use), claude writes one record per result. Each parents on the corresponding assistant record.""" seed = build_seed_jsonl( [ {"role": "user", "content": "u"}, { "role": "assistant", "content": [ {"type": "tool_use", "id": "A", "name": "x", "input": {}} ], }, { "role": "assistant", "content": [ {"type": "tool_use", "id": "B", "name": "y", "input": {}} ], }, { "role": "user", "content": [ {"type": "tool_result", "tool_use_id": "A", "content": "a"}, {"type": "tool_result", "tool_use_id": "B", "content": "b"}, ], }, ], session_id="s", cwd="/tmp", ) recs = _records(seed) asst_recs = [r for r in recs if r.get("type") == "assistant"] tool_result_recs = [ r for r in recs if r.get("type") == "user" and isinstance(r["message"]["content"], list) and r["message"]["content"][0].get("type") == "tool_result" ] assert len(tool_result_recs) == 2 a_uuid = next( r["uuid"] for r in asst_recs if r["message"]["content"][0].get("id") == "A" ) b_uuid = next( r["uuid"] for r in asst_recs if r["message"]["content"][0].get("id") == "B" ) assert tool_result_recs[0]["sourceToolAssistantUUID"] == a_uuid assert tool_result_recs[1]["sourceToolAssistantUUID"] == b_uuid def test_build_seed_jsonl_chains_parent_uuids_linearly() -> None: """Every content-carrying record (user, assistant, tool_result) chains via parentUuid back through the record graph. The first user has parentUuid=None; subsequent records have non-null parents.""" seed = build_seed_jsonl( [ {"role": "user", "content": "u1"}, {"role": "assistant", "content": "a1"}, {"role": "user", "content": "u2"}, {"role": "assistant", "content": "a2"}, ], session_id="s", cwd="/tmp", ) chain = [ r for r in _records(seed) if r.get("type") in ("user", "assistant") ] assert chain[0]["parentUuid"] is None for prev, nxt in zip(chain, chain[1:], strict=False): assert nxt["parentUuid"] == prev["uuid"] def test_build_seed_jsonl_rejects_unknown_role() -> None: with pytest.raises(ValueError, match="role"): build_seed_jsonl( [{"role": "system", "content": "x"}], session_id="s", cwd="/tmp" ) # --- build_concat_prompt -------------------------------------------------- def test_build_concat_prompt_empty_history_returns_just_last_user() -> None: assert build_concat_prompt([], "hello") == "hello" def test_build_concat_prompt_renders_alternating_history() -> None: out = build_concat_prompt( [ {"role": "user", "content": "u1"}, {"role": "assistant", "content": "a1"}, {"role": "user", "content": "u2"}, {"role": "assistant", "content": "a2"}, ], "u3", ) assert "Previous conversation context:" in out assert "[User]: u1" in out assert "[Assistant]: a1" in out assert "[User]: u2" in out assert "[Assistant]: a2" in out assert "Continue from here. New user message: u3" in out # The new prompt must come after the history, not interleaved. assert out.index("[Assistant]: a2") < out.index("Continue from here") def test_build_concat_prompt_flattens_text_blocks_and_skips_tools() -> None: """Content-as-list with text blocks gets flattened; tool blocks are skipped (they don't round-trip through stdin in any useful form).""" out = build_concat_prompt( [ { "role": "assistant", "content": [ {"type": "text", "text": "hello"}, {"type": "tool_use", "id": "t1", "name": "x", "input": {}}, {"type": "text", "text": "world"}, ], }, ], "ping", ) assert "[Assistant]: hello world" in out