feat(models): define Rust types for JSONL schema [claudbg-uls1]

Add src/models/session.rs with permissive RawEntry, Message, MessageContent (untagged), ContentBlock (tagged), Usage, SystemMessage. Add src/models/stats.rs with SessionStats and compute_stats(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
4 months ago · 620a3571c6
parent 63e1f34778
commit 620a3571c6
4 changed files with 438 additions and 1 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -4,7 +4,7 @@ pub mod cli;
 // pub mod commands;
 // pub mod db;
 pub mod error;
-// pub mod models;
+pub mod models;
 // pub mod output;
 pub mod util;
--- a/src/models/mod.rs
+++ b/src/models/mod.rs
@ -0,0 +1,3 @@
 //! Data models for Claude Code session JSONL files.
 pub mod session;
 pub mod stats;
--- a/src/models/session.rs
+++ b/src/models/session.rs
@ -0,0 +1,253 @@
 //! Rust types for deserializing Claude Code JSONL session files.
 //!
 //! These types are intentionally permissive — real session files contain many
 //! undocumented fields, so `RawEntry` uses `#[serde(flatten)]` to capture
 //! unknown fields rather than failing deserialization.
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 /// Raw deserialized line from a JSONL session file.
 ///
 /// Uses permissive deserialization — NO `deny_unknown_fields`.
 /// All fields are optional because different entry types have different fields.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RawEntry {
    /// Entry type: `"user"`, `"assistant"`, `"system"`, `"progress"`,
    /// `"file-history-snapshot"`, etc.
    #[serde(rename = "type")]
    pub entry_type: Option<String>,
    /// UUID of this session.
    pub session_id: Option<String>,
    /// UUID of the parent agent, if this is a sub-agent run.
    pub parent_session_id: Option<String>,
    /// The message payload (present on user/assistant entries).
    pub message: Option<Message>,
    /// System message payload.
    pub system_message: Option<SystemMessage>,
    /// Current working directory (present on first system entry).
    pub cwd: Option<String>,
    /// Timestamp string (ISO 8601).
    pub timestamp: Option<String>,
    /// Unix milliseconds of the conversation turn duration.
    pub duration_ms: Option<u64>,
    /// Raw JSON for any fields this struct doesn't explicitly model.
    #[serde(flatten)]
    pub extra: std::collections::HashMap<String, Value>,
 }
 /// A message within a session entry (user or assistant role).
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
    /// Role: `"user"` or `"assistant"`.
    pub role: Option<String>,
    /// Content: either a plain string or an array of content blocks.
    pub content: Option<MessageContent>,
    /// Token usage statistics (present on assistant messages).
    pub usage: Option<Usage>,
    /// Model identifier (e.g. `"claude-opus-4-5-20251001"`).
    pub model: Option<String>,
    /// Stop reason from the API (e.g. `"end_turn"`, `"tool_use"`).
    pub stop_reason: Option<String>,
 }
 /// Message content: either a plain text string or an array of typed blocks.
 ///
 /// Untagged: tries `Blocks` first, falls back to `Text`.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum MessageContent {
    /// Array of structured content blocks.
    Blocks(Vec<ContentBlock>),
    /// Plain text string content.
    Text(String),
 }
 /// A single content block within a message.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ContentBlock {
    /// Plain text output.
    Text {
        /// The text content.
        text: String,
    },
    /// Extended thinking block.
    Thinking {
        /// The thinking content.
        thinking: String,
    },
    /// A tool invocation.
    ToolUse {
        /// Unique ID for this tool call.
        id: String,
        /// Tool name (e.g. `"Bash"`, `"Read"`, `"Write"`).
        name: String,
        /// Tool call arguments as raw JSON.
        input: Value,
    },
    /// Result of a tool invocation.
    ToolResult {
        /// The `tool_use` id this result corresponds to.
        tool_use_id: String,
        /// Result content (string or nested blocks).
        content: Option<Value>,
        /// `true` if the tool returned an error.
        is_error: Option<bool>,
    },
    /// An image block.
    Image {
        /// Image source details.
        source: Option<Value>,
    },
    /// Catch-all for unknown block types encountered in real data.
    #[serde(other)]
    Unknown,
 }
 /// Token usage for an assistant message.
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct Usage {
    /// Tokens in the input prompt.
    pub input_tokens: Option<u64>,
    /// Tokens in the generated output.
    pub output_tokens: Option<u64>,
    /// Tokens read from the cache.
    pub cache_read_input_tokens: Option<u64>,
    /// Tokens written to the cache.
    pub cache_creation_input_tokens: Option<u64>,
 }
 /// System-level message metadata.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SystemMessage {
    /// System message type (e.g. `"turn_duration"`).
    #[serde(rename = "type")]
    pub msg_type: Option<String>,
    /// Duration in milliseconds (for `turn_duration` entries).
    pub duration_ms: Option<u64>,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    /// An assistant message with `text` and `thinking` blocks deserializes correctly.
    #[test]
    fn parse_assistant_text_and_thinking() {
        let json = r#"{
            "type": "assistant",
            "session_id": "abc123",
            "message": {
                "role": "assistant",
                "content": [
                    {"type": "thinking", "thinking": "Let me think about this."},
                    {"type": "text", "text": "Here is my answer."}
                ],
                "usage": {
                    "input_tokens": 100,
                    "output_tokens": 50
                },
                "model": "claude-opus-4-5"
            }
        }"#;
        let entry: RawEntry = serde_json::from_str(json).expect("should parse");
        assert_eq!(entry.entry_type.as_deref(), Some("assistant"));
        let msg = entry.message.expect("should have message");
        assert_eq!(msg.role.as_deref(), Some("assistant"));
        assert_eq!(msg.model.as_deref(), Some("claude-opus-4-5"));
        let usage = msg.usage.expect("should have usage");
        assert_eq!(usage.input_tokens, Some(100));
        assert_eq!(usage.output_tokens, Some(50));
        if let Some(MessageContent::Blocks(blocks)) = msg.content {
            assert_eq!(blocks.len(), 2);
            assert!(matches!(blocks[0], ContentBlock::Thinking { .. }));
            assert!(matches!(blocks[1], ContentBlock::Text { .. }));
        } else {
            panic!("expected Blocks content");
        }
    }
    /// A user message with a `tool_result` block deserializes correctly.
    #[test]
    fn parse_user_tool_result() {
        let json = r#"{
            "type": "user",
            "session_id": "def456",
            "message": {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "tool_use_id": "toolu_01",
                        "content": "command output here",
                        "is_error": false
                    }
                ]
            }
        }"#;
        let entry: RawEntry = serde_json::from_str(json).expect("should parse");
        let msg = entry.message.expect("should have message");
        if let Some(MessageContent::Blocks(blocks)) = msg.content {
            assert_eq!(blocks.len(), 1);
            if let ContentBlock::ToolResult {
                tool_use_id,
                is_error,
                ..
            } = &blocks[0]
            {
                assert_eq!(tool_use_id, "toolu_01");
                assert_eq!(*is_error, Some(false));
            } else {
                panic!("expected ToolResult block");
            }
        } else {
            panic!("expected Blocks content");
        }
    }
    /// A `RawEntry` with unknown fields deserializes without error.
    #[test]
    fn raw_entry_unknown_fields_ok() {
        let json = r#"{
            "type": "progress",
            "session_id": "ghi789",
            "permissionMode": "auto",
            "agentId": "some-agent-id",
            "planContent": {"steps": []},
            "unknownFutureField": true
        }"#;
        let entry: RawEntry =
            serde_json::from_str(json).expect("should parse despite unknown fields");
        assert_eq!(entry.entry_type.as_deref(), Some("progress"));
        // Extra fields captured in the flatten map
        assert!(entry.extra.contains_key("permissionMode"));
        assert!(entry.extra.contains_key("agentId"));
    }
    /// A message with `content` as a plain string produces `MessageContent::Text`.
    #[test]
    fn parse_message_content_as_string() {
        let json = r#"{
            "type": "user",
            "session_id": "jkl012",
            "message": {
                "role": "user",
                "content": "Hello, Claude!"
            }
        }"#;
        let entry: RawEntry = serde_json::from_str(json).expect("should parse");
        let msg = entry.message.expect("should have message");
        if let Some(MessageContent::Text(text)) = msg.content {
            assert_eq!(text, "Hello, Claude!");
        } else {
            panic!("expected Text content");
        }
    }
 }
--- a/src/models/stats.rs
+++ b/src/models/stats.rs
@ -0,0 +1,181 @@
 //! Session statistics computed from raw JSONL entries.
 use std::collections::HashMap;
 use crate::models::session::{ContentBlock, MessageContent, RawEntry};
 /// Aggregated statistics for a session.
 #[derive(Debug, Clone, Default)]
 pub struct SessionStats {
    /// Total input tokens across all assistant messages.
    pub input_tokens: u64,
    /// Total output tokens across all assistant messages.
    pub output_tokens: u64,
    /// Total cache-read tokens across all assistant messages.
    pub cache_read_tokens: u64,
    /// Total cache-creation tokens across all assistant messages.
    pub cache_creation_tokens: u64,
    /// Tool call counts by tool name.
    pub tool_calls: HashMap<String, u64>,
    /// Total duration in milliseconds (from system `turn_duration` entries).
    pub duration_ms: u64,
    /// Model identifier from the first assistant message that has one.
    pub model: Option<String>,
    /// Total number of user messages.
    pub user_message_count: u64,
    /// Total number of assistant messages.
    pub assistant_message_count: u64,
 }
 /// Compute session statistics from a slice of raw entries.
 pub fn compute_stats(entries: &[RawEntry]) -> SessionStats {
    let mut stats = SessionStats::default();
    for entry in entries {
        match entry.entry_type.as_deref() {
            Some("assistant") => {
                if let Some(msg) = &entry.message {
                    stats.assistant_message_count += 1;
                    // Accumulate usage tokens.
                    if let Some(usage) = &msg.usage {
                        stats.input_tokens += usage.input_tokens.unwrap_or(0);
                        stats.output_tokens += usage.output_tokens.unwrap_or(0);
                        stats.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
                        stats.cache_creation_tokens +=
                            usage.cache_creation_input_tokens.unwrap_or(0);
                    }
                    // Capture model from first assistant message that has one.
                    if stats.model.is_none() {
                        stats.model = msg.model.clone();
                    }
                    // Count tool_use blocks.
                    if let Some(MessageContent::Blocks(blocks)) = &msg.content {
                        for block in blocks {
                            if let ContentBlock::ToolUse { name, .. } = block {
                                *stats.tool_calls.entry(name.clone()).or_insert(0) += 1;
                            }
                        }
                    }
                }
            }
            Some("user") => {
                if entry.message.is_some() {
                    stats.user_message_count += 1;
                }
            }
            Some("system") => {
                // Accumulate duration from `turn_duration` system entries.
                if let Some(dur) = entry.duration_ms {
                    stats.duration_ms += dur;
                }
            }
            _ => {}
        }
    }
    stats
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::models::session::{Message, MessageContent, RawEntry, Usage};
    fn make_entry(entry_type: &str) -> RawEntry {
        RawEntry {
            entry_type: Some(entry_type.to_string()),
            session_id: None,
            parent_session_id: None,
            message: None,
            system_message: None,
            cwd: None,
            timestamp: None,
            duration_ms: None,
            extra: Default::default(),
        }
    }
    /// Empty entry slice produces all-zero stats with no model.
    #[test]
    fn empty_entries_all_zeros() {
        let stats = compute_stats(&[]);
        assert_eq!(stats.input_tokens, 0);
        assert_eq!(stats.output_tokens, 0);
        assert_eq!(stats.cache_read_tokens, 0);
        assert_eq!(stats.cache_creation_tokens, 0);
        assert_eq!(stats.duration_ms, 0);
        assert_eq!(stats.user_message_count, 0);
        assert_eq!(stats.assistant_message_count, 0);
        assert!(stats.model.is_none());
        assert!(stats.tool_calls.is_empty());
    }
    /// A single assistant entry with usage produces correct token counts.
    #[test]
    fn single_assistant_usage() {
        let mut entry = make_entry("assistant");
        entry.message = Some(Message {
            role: Some("assistant".to_string()),
            content: None,
            usage: Some(Usage {
                input_tokens: Some(200),
                output_tokens: Some(80),
                cache_read_input_tokens: Some(50),
                cache_creation_input_tokens: Some(10),
            }),
            model: Some("claude-opus-4-5".to_string()),
            stop_reason: None,
        });
        let stats = compute_stats(&[entry]);
        assert_eq!(stats.input_tokens, 200);
        assert_eq!(stats.output_tokens, 80);
        assert_eq!(stats.cache_read_tokens, 50);
        assert_eq!(stats.cache_creation_tokens, 10);
        assert_eq!(stats.assistant_message_count, 1);
        assert_eq!(stats.model.as_deref(), Some("claude-opus-4-5"));
    }
    /// An assistant entry with tool_use blocks increments tool call counts.
    #[test]
    fn assistant_tool_use_counts() {
        let mut entry = make_entry("assistant");
        entry.message = Some(Message {
            role: Some("assistant".to_string()),
            content: Some(MessageContent::Blocks(vec![
                ContentBlock::ToolUse {
                    id: "t1".to_string(),
                    name: "Bash".to_string(),
                    input: serde_json::Value::Null,
                },
                ContentBlock::ToolUse {
                    id: "t2".to_string(),
                    name: "Read".to_string(),
                    input: serde_json::Value::Null,
                },
                ContentBlock::ToolUse {
                    id: "t3".to_string(),
                    name: "Bash".to_string(),
                    input: serde_json::Value::Null,
                },
            ])),
            usage: None,
            model: None,
            stop_reason: None,
        });
        let stats = compute_stats(&[entry]);
        assert_eq!(stats.tool_calls.get("Bash"), Some(&2));
        assert_eq!(stats.tool_calls.get("Read"), Some(&1));
    }
    /// Multiple entries with `duration_ms` are summed correctly.
    #[test]
    fn duration_summed_across_entries() {
        let mut e1 = make_entry("system");
        e1.duration_ms = Some(1000);
        let mut e2 = make_entry("system");
        e2.duration_ms = Some(2500);
        let stats = compute_stats(&[e1, e2]);
        assert_eq!(stats.duration_ms, 3500);
    }
 }