diff --git a/src/lib.rs b/src/lib.rs index 8e3c9ff..9ca3cd8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ pub mod cli; // pub mod commands; // pub mod db; pub mod error; -// pub mod models; +pub mod models; // pub mod output; pub mod util; diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..721e2c2 --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,3 @@ +//! Data models for Claude Code session JSONL files. +pub mod session; +pub mod stats; diff --git a/src/models/session.rs b/src/models/session.rs new file mode 100644 index 0000000..cdce051 --- /dev/null +++ b/src/models/session.rs @@ -0,0 +1,253 @@ +//! Rust types for deserializing Claude Code JSONL session files. +//! +//! These types are intentionally permissive — real session files contain many +//! undocumented fields, so `RawEntry` uses `#[serde(flatten)]` to capture +//! unknown fields rather than failing deserialization. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// Raw deserialized line from a JSONL session file. +/// +/// Uses permissive deserialization — NO `deny_unknown_fields`. +/// All fields are optional because different entry types have different fields. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RawEntry { + /// Entry type: `"user"`, `"assistant"`, `"system"`, `"progress"`, + /// `"file-history-snapshot"`, etc. + #[serde(rename = "type")] + pub entry_type: Option, + /// UUID of this session. + pub session_id: Option, + /// UUID of the parent agent, if this is a sub-agent run. + pub parent_session_id: Option, + /// The message payload (present on user/assistant entries). + pub message: Option, + /// System message payload. + pub system_message: Option, + /// Current working directory (present on first system entry). + pub cwd: Option, + /// Timestamp string (ISO 8601). + pub timestamp: Option, + /// Unix milliseconds of the conversation turn duration. + pub duration_ms: Option, + /// Raw JSON for any fields this struct doesn't explicitly model. + #[serde(flatten)] + pub extra: std::collections::HashMap, +} + +/// A message within a session entry (user or assistant role). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Role: `"user"` or `"assistant"`. + pub role: Option, + /// Content: either a plain string or an array of content blocks. + pub content: Option, + /// Token usage statistics (present on assistant messages). + pub usage: Option, + /// Model identifier (e.g. `"claude-opus-4-5-20251001"`). + pub model: Option, + /// Stop reason from the API (e.g. `"end_turn"`, `"tool_use"`). + pub stop_reason: Option, +} + +/// Message content: either a plain text string or an array of typed blocks. +/// +/// Untagged: tries `Blocks` first, falls back to `Text`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + /// Array of structured content blocks. + Blocks(Vec), + /// Plain text string content. + Text(String), +} + +/// A single content block within a message. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentBlock { + /// Plain text output. + Text { + /// The text content. + text: String, + }, + /// Extended thinking block. + Thinking { + /// The thinking content. + thinking: String, + }, + /// A tool invocation. + ToolUse { + /// Unique ID for this tool call. + id: String, + /// Tool name (e.g. `"Bash"`, `"Read"`, `"Write"`). + name: String, + /// Tool call arguments as raw JSON. + input: Value, + }, + /// Result of a tool invocation. + ToolResult { + /// The `tool_use` id this result corresponds to. + tool_use_id: String, + /// Result content (string or nested blocks). + content: Option, + /// `true` if the tool returned an error. + is_error: Option, + }, + /// An image block. + Image { + /// Image source details. + source: Option, + }, + /// Catch-all for unknown block types encountered in real data. + #[serde(other)] + Unknown, +} + +/// Token usage for an assistant message. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Usage { + /// Tokens in the input prompt. + pub input_tokens: Option, + /// Tokens in the generated output. + pub output_tokens: Option, + /// Tokens read from the cache. + pub cache_read_input_tokens: Option, + /// Tokens written to the cache. + pub cache_creation_input_tokens: Option, +} + +/// System-level message metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SystemMessage { + /// System message type (e.g. `"turn_duration"`). + #[serde(rename = "type")] + pub msg_type: Option, + /// Duration in milliseconds (for `turn_duration` entries). + pub duration_ms: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + /// An assistant message with `text` and `thinking` blocks deserializes correctly. + #[test] + fn parse_assistant_text_and_thinking() { + let json = r#"{ + "type": "assistant", + "session_id": "abc123", + "message": { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "Let me think about this."}, + {"type": "text", "text": "Here is my answer."} + ], + "usage": { + "input_tokens": 100, + "output_tokens": 50 + }, + "model": "claude-opus-4-5" + } + }"#; + + let entry: RawEntry = serde_json::from_str(json).expect("should parse"); + assert_eq!(entry.entry_type.as_deref(), Some("assistant")); + let msg = entry.message.expect("should have message"); + assert_eq!(msg.role.as_deref(), Some("assistant")); + assert_eq!(msg.model.as_deref(), Some("claude-opus-4-5")); + + let usage = msg.usage.expect("should have usage"); + assert_eq!(usage.input_tokens, Some(100)); + assert_eq!(usage.output_tokens, Some(50)); + + if let Some(MessageContent::Blocks(blocks)) = msg.content { + assert_eq!(blocks.len(), 2); + assert!(matches!(blocks[0], ContentBlock::Thinking { .. })); + assert!(matches!(blocks[1], ContentBlock::Text { .. })); + } else { + panic!("expected Blocks content"); + } + } + + /// A user message with a `tool_result` block deserializes correctly. + #[test] + fn parse_user_tool_result() { + let json = r#"{ + "type": "user", + "session_id": "def456", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_01", + "content": "command output here", + "is_error": false + } + ] + } + }"#; + + let entry: RawEntry = serde_json::from_str(json).expect("should parse"); + let msg = entry.message.expect("should have message"); + if let Some(MessageContent::Blocks(blocks)) = msg.content { + assert_eq!(blocks.len(), 1); + if let ContentBlock::ToolResult { + tool_use_id, + is_error, + .. + } = &blocks[0] + { + assert_eq!(tool_use_id, "toolu_01"); + assert_eq!(*is_error, Some(false)); + } else { + panic!("expected ToolResult block"); + } + } else { + panic!("expected Blocks content"); + } + } + + /// A `RawEntry` with unknown fields deserializes without error. + #[test] + fn raw_entry_unknown_fields_ok() { + let json = r#"{ + "type": "progress", + "session_id": "ghi789", + "permissionMode": "auto", + "agentId": "some-agent-id", + "planContent": {"steps": []}, + "unknownFutureField": true + }"#; + + let entry: RawEntry = + serde_json::from_str(json).expect("should parse despite unknown fields"); + assert_eq!(entry.entry_type.as_deref(), Some("progress")); + // Extra fields captured in the flatten map + assert!(entry.extra.contains_key("permissionMode")); + assert!(entry.extra.contains_key("agentId")); + } + + /// A message with `content` as a plain string produces `MessageContent::Text`. + #[test] + fn parse_message_content_as_string() { + let json = r#"{ + "type": "user", + "session_id": "jkl012", + "message": { + "role": "user", + "content": "Hello, Claude!" + } + }"#; + + let entry: RawEntry = serde_json::from_str(json).expect("should parse"); + let msg = entry.message.expect("should have message"); + if let Some(MessageContent::Text(text)) = msg.content { + assert_eq!(text, "Hello, Claude!"); + } else { + panic!("expected Text content"); + } + } +} diff --git a/src/models/stats.rs b/src/models/stats.rs new file mode 100644 index 0000000..9e80e5b --- /dev/null +++ b/src/models/stats.rs @@ -0,0 +1,181 @@ +//! Session statistics computed from raw JSONL entries. + +use std::collections::HashMap; + +use crate::models::session::{ContentBlock, MessageContent, RawEntry}; + +/// Aggregated statistics for a session. +#[derive(Debug, Clone, Default)] +pub struct SessionStats { + /// Total input tokens across all assistant messages. + pub input_tokens: u64, + /// Total output tokens across all assistant messages. + pub output_tokens: u64, + /// Total cache-read tokens across all assistant messages. + pub cache_read_tokens: u64, + /// Total cache-creation tokens across all assistant messages. + pub cache_creation_tokens: u64, + /// Tool call counts by tool name. + pub tool_calls: HashMap, + /// Total duration in milliseconds (from system `turn_duration` entries). + pub duration_ms: u64, + /// Model identifier from the first assistant message that has one. + pub model: Option, + /// Total number of user messages. + pub user_message_count: u64, + /// Total number of assistant messages. + pub assistant_message_count: u64, +} + +/// Compute session statistics from a slice of raw entries. +pub fn compute_stats(entries: &[RawEntry]) -> SessionStats { + let mut stats = SessionStats::default(); + for entry in entries { + match entry.entry_type.as_deref() { + Some("assistant") => { + if let Some(msg) = &entry.message { + stats.assistant_message_count += 1; + // Accumulate usage tokens. + if let Some(usage) = &msg.usage { + stats.input_tokens += usage.input_tokens.unwrap_or(0); + stats.output_tokens += usage.output_tokens.unwrap_or(0); + stats.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0); + stats.cache_creation_tokens += + usage.cache_creation_input_tokens.unwrap_or(0); + } + // Capture model from first assistant message that has one. + if stats.model.is_none() { + stats.model = msg.model.clone(); + } + // Count tool_use blocks. + if let Some(MessageContent::Blocks(blocks)) = &msg.content { + for block in blocks { + if let ContentBlock::ToolUse { name, .. } = block { + *stats.tool_calls.entry(name.clone()).or_insert(0) += 1; + } + } + } + } + } + Some("user") => { + if entry.message.is_some() { + stats.user_message_count += 1; + } + } + Some("system") => { + // Accumulate duration from `turn_duration` system entries. + if let Some(dur) = entry.duration_ms { + stats.duration_ms += dur; + } + } + _ => {} + } + } + stats +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::models::session::{Message, MessageContent, RawEntry, Usage}; + + fn make_entry(entry_type: &str) -> RawEntry { + RawEntry { + entry_type: Some(entry_type.to_string()), + session_id: None, + parent_session_id: None, + message: None, + system_message: None, + cwd: None, + timestamp: None, + duration_ms: None, + extra: Default::default(), + } + } + + /// Empty entry slice produces all-zero stats with no model. + #[test] + fn empty_entries_all_zeros() { + let stats = compute_stats(&[]); + assert_eq!(stats.input_tokens, 0); + assert_eq!(stats.output_tokens, 0); + assert_eq!(stats.cache_read_tokens, 0); + assert_eq!(stats.cache_creation_tokens, 0); + assert_eq!(stats.duration_ms, 0); + assert_eq!(stats.user_message_count, 0); + assert_eq!(stats.assistant_message_count, 0); + assert!(stats.model.is_none()); + assert!(stats.tool_calls.is_empty()); + } + + /// A single assistant entry with usage produces correct token counts. + #[test] + fn single_assistant_usage() { + let mut entry = make_entry("assistant"); + entry.message = Some(Message { + role: Some("assistant".to_string()), + content: None, + usage: Some(Usage { + input_tokens: Some(200), + output_tokens: Some(80), + cache_read_input_tokens: Some(50), + cache_creation_input_tokens: Some(10), + }), + model: Some("claude-opus-4-5".to_string()), + stop_reason: None, + }); + + let stats = compute_stats(&[entry]); + assert_eq!(stats.input_tokens, 200); + assert_eq!(stats.output_tokens, 80); + assert_eq!(stats.cache_read_tokens, 50); + assert_eq!(stats.cache_creation_tokens, 10); + assert_eq!(stats.assistant_message_count, 1); + assert_eq!(stats.model.as_deref(), Some("claude-opus-4-5")); + } + + /// An assistant entry with tool_use blocks increments tool call counts. + #[test] + fn assistant_tool_use_counts() { + let mut entry = make_entry("assistant"); + entry.message = Some(Message { + role: Some("assistant".to_string()), + content: Some(MessageContent::Blocks(vec![ + ContentBlock::ToolUse { + id: "t1".to_string(), + name: "Bash".to_string(), + input: serde_json::Value::Null, + }, + ContentBlock::ToolUse { + id: "t2".to_string(), + name: "Read".to_string(), + input: serde_json::Value::Null, + }, + ContentBlock::ToolUse { + id: "t3".to_string(), + name: "Bash".to_string(), + input: serde_json::Value::Null, + }, + ])), + usage: None, + model: None, + stop_reason: None, + }); + + let stats = compute_stats(&[entry]); + assert_eq!(stats.tool_calls.get("Bash"), Some(&2)); + assert_eq!(stats.tool_calls.get("Read"), Some(&1)); + } + + /// Multiple entries with `duration_ms` are summed correctly. + #[test] + fn duration_summed_across_entries() { + let mut e1 = make_entry("system"); + e1.duration_ms = Some(1000); + let mut e2 = make_entry("system"); + e2.duration_ms = Some(2500); + + let stats = compute_stats(&[e1, e2]); + assert_eq!(stats.duration_ms, 3500); + } +}