diff --git a/README.md b/README.md index 53612ee..037881f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Tests](https://github.com/simonw/claude-code-transcripts/workflows/Test/badge.svg)](https://github.com/simonw/claude-code-transcripts/actions?query=workflow%3ATest) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/claude-code-transcripts/blob/main/LICENSE) -Convert Claude Code session files (JSON or JSONL) to clean, mobile-friendly HTML pages with pagination. +Convert Claude Code and Codex CLI session files (JSON or JSONL) to clean, mobile-friendly HTML pages with pagination. [Example transcript](https://static.simonwillison.net/static/2025/claude-code-microjs/index.html) produced using this tool. @@ -24,11 +24,15 @@ uvx claude-code-transcripts --help ## Usage -This tool converts Claude Code session files into browseable multi-page HTML transcripts. +This tool converts Claude Code and Codex CLI session files into browseable multi-page HTML transcripts. + +**Supported formats:** +- Claude Code session files (JSONL format from `~/.claude/projects`) +- Codex CLI session files (JSONL format from `~/.codex/sessions`) - automatically detected and converted There are four commands available: -- `local` (default) - select from local Claude Code sessions stored in `~/.claude/projects` +- `local` (default) - select from local sessions (Claude Code from `~/.claude/projects` and Codex CLI from `~/.codex/sessions`) - `web` - select from web sessions via the Claude API - `json` - convert a specific JSON or JSONL session file - `all` - convert all local sessions to a browsable HTML archive @@ -39,7 +43,7 @@ The quickest way to view a recent local session: claude-code-transcripts ``` -This shows an interactive picker to select a session, generates HTML, and opens it in your default browser. +This shows an interactive picker with sessions from both Claude Code and Codex CLI, clearly labeled by source. Select any session to generate HTML and open it in your browser. ### Output options diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index f2246a2..862a5e0 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -75,6 +75,20 @@ def extract_text_from_content(content): return "" +def _is_preamble_text(text): + if not isinstance(text, str): + return False + stripped = text.lstrip() + if not stripped: + return False + if stripped.startswith("<"): + return True + lowered = stripped.lower() + return lowered.startswith("# agents.md instructions") or lowered.startswith( + "agents.md instructions" + ) + + # Module-level variable for GitHub repo (set by generate_html) _github_repo = None @@ -103,7 +117,7 @@ def get_session_summary(filepath, max_length=200): msg = entry.get("message", {}) content = msg.get("content", "") text = extract_text_from_content(content) - if text: + if text and not _is_preamble_text(text): if len(text) > max_length: return text[: max_length - 3] + "..." return text @@ -139,6 +153,8 @@ def _get_jsonl_summary(filepath, max_length=200): continue try: obj = json.loads(line) + + # Claude Code format: {"type": "user", "message": {...}} if ( obj.get("type") == "user" and not obj.get("isMeta") @@ -146,10 +162,44 @@ def _get_jsonl_summary(filepath, max_length=200): ): content = obj["message"]["content"] text = extract_text_from_content(content) - if text and not text.startswith("<"): + if text and not _is_preamble_text(text): if len(text) > max_length: return text[: max_length - 3] + "..." return text + + # Codex CLI format: {"type": "response_item", "payload": {"type": "message", "role": "user", "content": [...]}} + elif obj.get("type") == "response_item": + payload = obj.get("payload", {}) + if ( + payload.get("type") == "message" + and payload.get("role") == "user" + and payload.get("content") + ): + content_blocks = payload["content"] + # Extract text from Codex CLI content blocks + if isinstance(content_blocks, list): + for block in content_blocks: + if block.get("type") == "input_text": + text = block.get("text", "") + if text and not _is_preamble_text(text): + if len(text) > max_length: + return text[: max_length - 3] + "..." + return text + # Codex CLI old format: {"type": "message", "role": "user", "content": [...]} + elif ( + obj.get("type") == "message" + and obj.get("role") == "user" + and obj.get("content") + ): + content_blocks = obj.get("content", []) + if isinstance(content_blocks, list): + for block in content_blocks: + if block.get("type") in ("input_text", "text"): + text = block.get("text", "") + if text and not _is_preamble_text(text): + if len(text) > max_length: + return text[: max_length - 3] + "..." + return text except json.JSONDecodeError: continue except Exception: @@ -183,6 +233,53 @@ def find_local_sessions(folder, limit=10): return results[:limit] +def find_combined_sessions(claude_dir=None, codex_dir=None, limit=10): + """Find recent sessions from both Claude Code and Codex CLI directories. + + Args: + claude_dir: Path to Claude Code projects folder (default: ~/.claude/projects) + codex_dir: Path to Codex CLI sessions folder (default: ~/.codex/sessions) + limit: Maximum number of sessions to return (default: 10) + + Returns: + List of (Path, summary, source) tuples sorted by modification time (newest first). + source is either "Claude" or "Codex". + """ + if claude_dir is None: + claude_dir = Path.home() / ".claude" / "projects" + if codex_dir is None: + codex_dir = Path.home() / ".codex" / "sessions" + + claude_dir = Path(claude_dir) + codex_dir = Path(codex_dir) + + results = [] + + # Find Claude Code sessions + if claude_dir.exists(): + for f in claude_dir.glob("**/*.jsonl"): + if f.name.startswith("agent-"): + continue + summary = get_session_summary(f) + if summary.lower() == "warmup" or summary == "(no summary)": + continue + results.append((f, summary, "Claude")) + + # Find Codex CLI sessions + if codex_dir.exists(): + for f in codex_dir.glob("**/*.jsonl"): + if f.name.startswith("agent-"): + continue + summary = get_session_summary(f) + if summary.lower() == "warmup" or summary == "(no summary)": + continue + results.append((f, summary, "Codex")) + + # Sort by modification time, most recent first + results.sort(key=lambda x: x[0].stat().st_mtime, reverse=True) + return results[:limit] + + def get_project_display_name(folder_name): """Convert encoded folder name to readable project name. @@ -464,8 +561,241 @@ def parse_session_file(filepath): return json.load(f) +def _is_codex_cli_format(filepath): + """Detect if a JSONL file is in Codex CLI format. + + Checks the first few lines for Codex CLI markers like session_meta or response_item. + """ + try: + saw_claude_message = False + with open(filepath, "r", encoding="utf-8") as f: + for idx, line in enumerate(f): + if idx >= 25: # Check the first 25 lines + break + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + entry_type = obj.get("type") + # Codex CLI markers (new and old formats) + if entry_type in ( + "session_meta", + "response_item", + "turn_context", + "event_msg", + ): + return True + if "record_type" in obj: + return True + if entry_type == "message" and obj.get("role") in ( + "user", + "assistant", + ): + return True + if entry_type in ( + "function_call", + "function_call_output", + "reasoning", + ): + return True + # Claude Code has "type" as user/assistant + if entry_type in ("user", "assistant"): + saw_claude_message = True + except json.JSONDecodeError: + continue + if saw_claude_message: + return False + except Exception: + pass + return False + + +def _map_codex_tool_to_claude(tool_name): + """Map Codex CLI tool names to Claude Code tool names.""" + mapping = { + "shell_command": "Bash", + "read_file": "Read", + "write_file": "Write", + "edit_file": "Edit", + "search_files": "Grep", + "list_files": "Glob", + } + return mapping.get(tool_name, tool_name) + + +def _convert_codex_content_to_claude(content_blocks): + """Convert Codex CLI content blocks to Claude Code format. + + Args: + content_blocks: List of Codex content blocks like [{"type": "input_text", "text": "..."}] + + Returns: + Either a string (for simple text) or list of Claude Code content blocks + """ + if not content_blocks: + return [] + + # If there's only one input_text block, return as simple string + if len(content_blocks) == 1 and content_blocks[0].get("type") == "input_text": + return content_blocks[0].get("text", "") + + # Otherwise convert to Claude Code format + claude_blocks = [] + for block in content_blocks: + block_type = block.get("type") + if block_type == "input_text": + claude_blocks.append({"type": "text", "text": block.get("text", "")}) + elif block_type == "output_text": + claude_blocks.append({"type": "text", "text": block.get("text", "")}) + elif block_type == "text": + # Already in Claude format + claude_blocks.append(block) + else: + # Pass through other types + claude_blocks.append(block) + + return claude_blocks + + +def _parse_codex_jsonl_file(filepath): + """Parse Codex CLI JSONL file and convert to Claude Code format.""" + loglines = [] + + def add_message(role, content, timestamp): + if role not in ("user", "assistant"): + return + converted_content = _convert_codex_content_to_claude(content) + loglines.append( + { + "type": role, + "timestamp": timestamp, + "message": {"role": role, "content": converted_content}, + } + ) + + def add_tool_use(tool_name, arguments, call_id, timestamp): + if isinstance(arguments, str): + try: + tool_input = json.loads(arguments) + except json.JSONDecodeError: + tool_input = {} + elif isinstance(arguments, dict): + tool_input = arguments + else: + tool_input = {} + + claude_tool_name = _map_codex_tool_to_claude(tool_name) + loglines.append( + { + "type": "assistant", + "timestamp": timestamp, + "message": { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": call_id, + "name": claude_tool_name, + "input": tool_input, + } + ], + }, + } + ) + + def add_tool_result(call_id, output, timestamp, is_error=False): + loglines.append( + { + "type": "user", + "timestamp": timestamp, + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": call_id, + "content": output, + "is_error": is_error, + } + ], + }, + } + ) + + with open(filepath, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + record_type = obj.get("type") + timestamp = obj.get("timestamp", "") + + if record_type == "response_item": + payload = obj.get("payload", {}) + payload_type = payload.get("type") + + if payload_type == "message": + add_message( + payload.get("role"), + payload.get("content", []), + timestamp, + ) + elif payload_type == "function_call": + add_tool_use( + payload.get("name", ""), + payload.get("arguments", "{}"), + payload.get("call_id", ""), + timestamp, + ) + elif payload_type == "function_call_output": + add_tool_result( + payload.get("call_id", ""), + payload.get("output", ""), + timestamp, + bool(payload.get("is_error")), + ) + elif record_type == "message": + add_message( + obj.get("role"), + obj.get("content", []), + timestamp, + ) + elif record_type == "function_call": + call_id = obj.get("call_id") or obj.get("id", "") + add_tool_use( + obj.get("name", ""), + obj.get("arguments", "{}"), + call_id, + timestamp, + ) + elif record_type == "function_call_output": + call_id = obj.get("call_id") or obj.get("id", "") + add_tool_result( + call_id, + obj.get("output", ""), + timestamp, + bool(obj.get("is_error")), + ) + + except json.JSONDecodeError: + continue + + return {"loglines": loglines} + + def _parse_jsonl_file(filepath): - """Parse JSONL file and convert to standard format.""" + """Parse JSONL file and convert to standard format. + + Automatically detects and handles both Claude Code and Codex CLI formats. + """ + # Detect format + if _is_codex_cli_format(filepath): + return _parse_codex_jsonl_file(filepath) + + # Original Claude Code format parsing loglines = [] with open(filepath, "r", encoding="utf-8") as f: @@ -1423,16 +1753,19 @@ def cli(): help="Maximum number of sessions to show (default: 10)", ) def local_cmd(output, output_auto, repo, gist, include_json, open_browser, limit): - """Select and convert a local Claude Code session to HTML.""" + """Select and convert a local Claude Code or Codex CLI session to HTML.""" projects_folder = Path.home() / ".claude" / "projects" + codex_folder = Path.home() / ".codex" / "sessions" - if not projects_folder.exists(): - click.echo(f"Projects folder not found: {projects_folder}") - click.echo("No local Claude Code sessions available.") + # Check if at least one directory exists + if not projects_folder.exists() and not codex_folder.exists(): + click.echo(f"Neither Claude Code nor Codex CLI sessions found.") + click.echo(f" - Claude Code: {projects_folder}") + click.echo(f" - Codex CLI: {codex_folder}") return click.echo("Loading local sessions...") - results = find_local_sessions(projects_folder, limit=limit) + results = find_combined_sessions(limit=limit) if not results: click.echo("No local sessions found.") @@ -1440,15 +1773,16 @@ def local_cmd(output, output_auto, repo, gist, include_json, open_browser, limit # Build choices for questionary choices = [] - for filepath, summary in results: + for filepath, summary, source in results: stat = filepath.stat() mod_time = datetime.fromtimestamp(stat.st_mtime) size_kb = stat.st_size / 1024 date_str = mod_time.strftime("%Y-%m-%d %H:%M") # Truncate summary if too long - if len(summary) > 50: - summary = summary[:47] + "..." - display = f"{date_str} {size_kb:5.0f} KB {summary}" + if len(summary) > 45: + summary = summary[:42] + "..." + # Add source label + display = f"{date_str} {size_kb:5.0f} KB [{source:6s}] {summary}" choices.append(questionary.Choice(title=display, value=filepath)) selected = questionary.select( diff --git a/tests/sample_codex_session.jsonl b/tests/sample_codex_session.jsonl new file mode 100644 index 0000000..dae88ec --- /dev/null +++ b/tests/sample_codex_session.jsonl @@ -0,0 +1,3 @@ +{"timestamp":"2025-01-01T00:00:00.000Z","type":"session_meta","payload":{"id":"test-session","timestamp":"2025-01-01T00:00:00.000Z","cwd":"/project","originator":"codex_cli_rs","cli_version":"0.0.0","instructions":"Sample instructions","source":"cli","model_provider":"openai"}} +{"timestamp":"2025-01-01T00:00:01.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Write a hello world function"}]}} +{"timestamp":"2025-01-01T00:00:02.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"text","text":"Here is a simple hello world function."}]}} diff --git a/tests/test_codex_cli_finder.py b/tests/test_codex_cli_finder.py new file mode 100644 index 0000000..8dd233d --- /dev/null +++ b/tests/test_codex_cli_finder.py @@ -0,0 +1,170 @@ +"""Tests for finding sessions from both Claude Code and Codex CLI directories.""" + +import tempfile +from pathlib import Path +import time + +import pytest + +from claude_code_transcripts import find_local_sessions, find_combined_sessions + + +class TestFindCombinedSessions: + """Tests for finding sessions from both ~/.claude/projects and ~/.codex/sessions.""" + + def test_finds_sessions_from_both_directories(self): + """Test that sessions from both Claude and Codex directories are found.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create mock Claude projects directory + claude_dir = tmpdir / "claude_projects" / "project-a" + claude_dir.mkdir(parents=True) + claude_session = claude_dir / "session1.jsonl" + claude_session.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Claude session"}}\n' + ) + + # Create mock Codex sessions directory + codex_dir = tmpdir / "codex_sessions" + codex_dir.mkdir(parents=True) + codex_session = codex_dir / "rollout-2025-12-28T10-00-00-abc123.jsonl" + codex_session.write_text( + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"session_meta","payload":{"id":"abc123"}}\n' + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Codex session"}]}}\n' + ) + + # Find sessions from both + results = find_combined_sessions( + claude_dir=tmpdir / "claude_projects", codex_dir=codex_dir + ) + + # Should find both + assert len(results) == 2 + paths = [r[0] for r in results] + assert claude_session in paths + assert codex_session in paths + + def test_labels_sessions_by_source(self): + """Test that sessions include source labels (Claude or Codex).""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create one of each type + claude_dir = tmpdir / "claude_projects" / "project-a" + claude_dir.mkdir(parents=True) + claude_session = claude_dir / "session1.jsonl" + claude_session.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Test"}}\n' + ) + + codex_dir = tmpdir / "codex_sessions" + codex_dir.mkdir(parents=True) + codex_session = codex_dir / "rollout-2025-12-28T10-00-00-abc123.jsonl" + codex_session.write_text( + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"session_meta","payload":{"id":"abc123"}}\n' + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Test"}]}}\n' + ) + + results = find_combined_sessions( + claude_dir=tmpdir / "claude_projects", codex_dir=codex_dir + ) + + # Results should be (Path, summary, source) tuples + assert len(results) == 2 + + claude_result = next(r for r in results if r[0] == claude_session) + codex_result = next(r for r in results if r[0] == codex_session) + + # Check source labels + assert claude_result[2] == "Claude" + assert codex_result[2] == "Codex" + + def test_sorts_combined_by_modification_time(self): + """Test that all sessions are sorted together by modification time.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create older Claude session + claude_dir = tmpdir / "claude_projects" / "project-a" + claude_dir.mkdir(parents=True) + old_claude = claude_dir / "old.jsonl" + old_claude.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Old"}}\n' + ) + + time.sleep(0.1) + + # Create newer Codex session + codex_dir = tmpdir / "codex_sessions" + codex_dir.mkdir(parents=True) + new_codex = codex_dir / "rollout-2025-12-28T10-00-00-abc123.jsonl" + new_codex.write_text( + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"session_meta","payload":{"id":"abc123"}}\n' + '{"timestamp":"2025-12-28T10:00:00.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"New"}]}}\n' + ) + + results = find_combined_sessions( + claude_dir=tmpdir / "claude_projects", codex_dir=codex_dir + ) + + # Newer file should be first regardless of source + assert results[0][0] == new_codex + assert results[1][0] == old_claude + + def test_respects_limit_across_both_sources(self): + """Test that limit applies to combined results.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create 3 Claude sessions + claude_dir = tmpdir / "claude_projects" / "project-a" + claude_dir.mkdir(parents=True) + for i in range(3): + f = claude_dir / f"session{i}.jsonl" + f.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Test"}}\n' + ) + + # Create 3 Codex sessions + codex_dir = tmpdir / "codex_sessions" + codex_dir.mkdir(parents=True) + for i in range(3): + f = codex_dir / f"rollout-2025-12-28T10-00-0{i}-test{i}.jsonl" + f.write_text( + f'{{"timestamp":"2025-12-28T10:00:0{i}.000Z","type":"session_meta","payload":{{"id":"test{i}"}}}}\n' + f'{{"timestamp":"2025-12-28T10:00:0{i}.000Z","type":"response_item","payload":{{"type":"message","role":"user","content":[{{"type":"input_text","text":"Test"}}]}}}}\n' + ) + + # Request only 4 total + results = find_combined_sessions( + claude_dir=tmpdir / "claude_projects", codex_dir=codex_dir, limit=4 + ) + + assert len(results) == 4 + + def test_handles_missing_directories(self): + """Test that missing directories don't cause errors.""" + # Both missing + results = find_combined_sessions( + claude_dir=Path("/nonexistent/claude"), + codex_dir=Path("/nonexistent/codex"), + ) + assert results == [] + + # Only Claude exists + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + claude_dir = tmpdir / "claude_projects" / "project-a" + claude_dir.mkdir(parents=True) + session = claude_dir / "session1.jsonl" + session.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Test"}}\n' + ) + + results = find_combined_sessions( + claude_dir=tmpdir / "claude_projects", + codex_dir=Path("/nonexistent/codex"), + ) + assert len(results) == 1 + assert results[0][2] == "Claude" diff --git a/tests/test_codex_format.py b/tests/test_codex_format.py new file mode 100644 index 0000000..2fadc4b --- /dev/null +++ b/tests/test_codex_format.py @@ -0,0 +1,237 @@ +"""Tests for Codex CLI format support.""" + +import tempfile +from pathlib import Path + +import pytest + +from claude_code_transcripts import parse_session_file, generate_html + + +class TestCodexCliFormatDetection: + """Tests for detecting Codex CLI format.""" + + def test_detects_codex_format_from_session_meta(self): + """Test that Codex format is detected from session_meta record type.""" + # Create a minimal Codex CLI JSONL file + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"session_meta","payload":{"id":"test-id","timestamp":"2025-12-28T12:18:30.522Z","cwd":"/test","originator":"codex_cli_rs","cli_version":"0.77.0"}}\n' + ) + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Hello world"}]}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + # Should have loglines key after parsing + assert "loglines" in data + # Should have at least one entry + assert len(data["loglines"]) >= 1 + finally: + temp_file.unlink() + + def test_detects_claude_code_format(self): + """Test that Claude Code format still works.""" + # Create a minimal Claude Code JSONL file + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Hello"}}\n' + ) + f.write( + '{"type": "assistant", "timestamp": "2025-01-01T10:00:05.000Z", "message": {"role": "assistant", "content": [{"type": "text", "text": "Hi!"}]}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + assert "loglines" in data + assert len(data["loglines"]) == 2 + finally: + temp_file.unlink() + + def test_detects_codex_format_from_message_record(self): + """Test that Codex format is detected from message/record_type records.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"id":"test-id","timestamp":"2025-08-31T20:48:31.616Z","instructions":null}\n' + ) + f.write('{"record_type":"state"}\n') + f.write( + '{"type":"message","id":null,"role":"user","content":[{"type":"input_text","text":"Hello old format"}]}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + assert len(loglines) == 1 + assert loglines[0]["type"] == "user" + assert loglines[0]["message"]["content"] == "Hello old format" + finally: + temp_file.unlink() + + +class TestCodexCliMessageParsing: + """Tests for parsing Codex CLI messages.""" + + def test_parses_user_message(self): + """Test that Codex user messages are converted to Claude Code format.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Test message"}]}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + assert len(loglines) == 1 + + # Check conversion to Claude Code format + entry = loglines[0] + assert entry["type"] == "user" + assert entry["timestamp"] == "2025-12-28T12:18:30.533Z" + assert "message" in entry + assert entry["message"]["role"] == "user" + # Content should be extracted from input_text + content = entry["message"]["content"] + assert content == "Test message" + finally: + temp_file.unlink() + + def test_parses_assistant_message(self): + """Test that Codex assistant messages are converted correctly.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"timestamp":"2025-12-28T12:18:40.000Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"text","text":"Response text"}]}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + assert len(loglines) == 1 + + entry = loglines[0] + assert entry["type"] == "assistant" + assert entry["message"]["role"] == "assistant" + # Content should be in Claude Code format + assert isinstance(entry["message"]["content"], list) + assert entry["message"]["content"][0]["type"] == "text" + assert entry["message"]["content"][0]["text"] == "Response text" + finally: + temp_file.unlink() + + def test_skips_non_message_records(self): + """Test that non-message records (session_meta, turn_context, etc.) are skipped.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"session_meta","payload":{"id":"test"}}\n' + ) + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"turn_context","payload":{}}\n' + ) + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"event_msg","payload":{}}\n' + ) + f.write( + '{"timestamp":"2025-12-28T12:18:30.533Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Only this"}]}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + # Should only have the one message + assert len(loglines) == 1 + assert loglines[0]["message"]["content"] == "Only this" + finally: + temp_file.unlink() + + +class TestCodexCliToolCalls: + """Tests for parsing Codex CLI tool calls.""" + + def test_parses_function_call(self): + """Test that Codex function_call is converted to tool_use.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + # Add a function call + f.write( + '{"timestamp":"2025-12-28T12:18:40.000Z","type":"response_item","payload":{"type":"function_call","name":"shell_command","arguments":"{\\"command\\":\\"ls -la\\"}","call_id":"call_123"}}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + assert len(loglines) == 1 + + entry = loglines[0] + assert entry["type"] == "assistant" + assert isinstance(entry["message"]["content"], list) + + # Check tool_use block + tool_use = entry["message"]["content"][0] + assert tool_use["type"] == "tool_use" + assert tool_use["name"] == "Bash" # shell_command -> Bash + assert "input" in tool_use + assert tool_use["input"]["command"] == "ls -la" + finally: + temp_file.unlink() + + def test_parses_function_call_output_old_format(self): + """Test that Codex function_call_output converts to tool_result.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write( + '{"id":"test-id","timestamp":"2025-08-31T20:48:31.616Z","instructions":null}\n' + ) + f.write('{"record_type":"state"}\n') + f.write( + '{"type":"function_call","id":"fc_123","name":"shell_command","arguments":"{\\"command\\":\\"ls -la\\"}","call_id":"call_123"}\n' + ) + f.write( + '{"type":"function_call_output","call_id":"call_123","output":"OK"}\n' + ) + temp_file = Path(f.name) + + try: + data = parse_session_file(temp_file) + loglines = data["loglines"] + assert len(loglines) == 2 + + tool_use = loglines[0]["message"]["content"][0] + assert tool_use["type"] == "tool_use" + assert tool_use["name"] == "Bash" + assert tool_use["id"] == "call_123" + + tool_result = loglines[1]["message"]["content"][0] + assert tool_result["type"] == "tool_result" + assert tool_result["tool_use_id"] == "call_123" + assert tool_result["content"] == "OK" + finally: + temp_file.unlink() + + +class TestCodexCliHtmlGeneration: + """Integration test for generating HTML from Codex CLI files.""" + + def test_generates_html_from_codex_file(self): + """Test that HTML can be generated from a Codex CLI session.""" + # Use the sample codex session file + sample_file = Path(__file__).parent / "sample_codex_session.jsonl" + if not sample_file.exists(): + pytest.skip("sample_codex_session.jsonl not found") + + with tempfile.TemporaryDirectory() as tmpdir: + output_dir = Path(tmpdir) + + # Should not raise an exception + generate_html(sample_file, output_dir) + + # Check that HTML was generated + assert (output_dir / "index.html").exists() + # Should have at least one page + pages = list(output_dir.glob("page-*.html")) + assert len(pages) >= 1 diff --git a/tests/test_generate_html.py b/tests/test_generate_html.py index 32120c5..32df201 100644 --- a/tests/test_generate_html.py +++ b/tests/test_generate_html.py @@ -1113,6 +1113,16 @@ def test_truncates_long_summaries(self, tmp_path): assert len(summary) <= 100 assert summary.endswith("...") + def test_skips_agents_preamble_in_codex_messages(self, tmp_path): + """Test skipping AGENTS.md preamble in Codex CLI messages.""" + jsonl_file = tmp_path / "codex.jsonl" + jsonl_file.write_text( + '{"type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"# AGENTS.md instructions for /Users/test/repo"}]}}\n' + '{"type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"Real question"}]}}\n' + ) + summary = get_session_summary(jsonl_file) + assert summary == "Real question" + class TestFindLocalSessions: """Tests for find_local_sessions which discovers local JSONL files."""