feat(models): define Rust types for JSONL schema [claudbg-uls1]

Add src/models/session.rs with permissive RawEntry, Message,
MessageContent (untagged), ContentBlock (tagged), Usage, SystemMessage.
Add src/models/stats.rs with SessionStats and compute_stats().

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
main
Elijah Voigt 2 months ago
parent 63e1f34778
commit 620a3571c6

@ -4,7 +4,7 @@ pub mod cli;
// pub mod commands; // pub mod commands;
// pub mod db; // pub mod db;
pub mod error; pub mod error;
// pub mod models; pub mod models;
// pub mod output; // pub mod output;
pub mod util; pub mod util;

@ -0,0 +1,3 @@
//! Data models for Claude Code session JSONL files.
pub mod session;
pub mod stats;

@ -0,0 +1,253 @@
//! Rust types for deserializing Claude Code JSONL session files.
//!
//! These types are intentionally permissive — real session files contain many
//! undocumented fields, so `RawEntry` uses `#[serde(flatten)]` to capture
//! unknown fields rather than failing deserialization.
use serde::{Deserialize, Serialize};
use serde_json::Value;
/// Raw deserialized line from a JSONL session file.
///
/// Uses permissive deserialization — NO `deny_unknown_fields`.
/// All fields are optional because different entry types have different fields.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RawEntry {
/// Entry type: `"user"`, `"assistant"`, `"system"`, `"progress"`,
/// `"file-history-snapshot"`, etc.
#[serde(rename = "type")]
pub entry_type: Option<String>,
/// UUID of this session.
pub session_id: Option<String>,
/// UUID of the parent agent, if this is a sub-agent run.
pub parent_session_id: Option<String>,
/// The message payload (present on user/assistant entries).
pub message: Option<Message>,
/// System message payload.
pub system_message: Option<SystemMessage>,
/// Current working directory (present on first system entry).
pub cwd: Option<String>,
/// Timestamp string (ISO 8601).
pub timestamp: Option<String>,
/// Unix milliseconds of the conversation turn duration.
pub duration_ms: Option<u64>,
/// Raw JSON for any fields this struct doesn't explicitly model.
#[serde(flatten)]
pub extra: std::collections::HashMap<String, Value>,
}
/// A message within a session entry (user or assistant role).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
/// Role: `"user"` or `"assistant"`.
pub role: Option<String>,
/// Content: either a plain string or an array of content blocks.
pub content: Option<MessageContent>,
/// Token usage statistics (present on assistant messages).
pub usage: Option<Usage>,
/// Model identifier (e.g. `"claude-opus-4-5-20251001"`).
pub model: Option<String>,
/// Stop reason from the API (e.g. `"end_turn"`, `"tool_use"`).
pub stop_reason: Option<String>,
}
/// Message content: either a plain text string or an array of typed blocks.
///
/// Untagged: tries `Blocks` first, falls back to `Text`.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MessageContent {
/// Array of structured content blocks.
Blocks(Vec<ContentBlock>),
/// Plain text string content.
Text(String),
}
/// A single content block within a message.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlock {
/// Plain text output.
Text {
/// The text content.
text: String,
},
/// Extended thinking block.
Thinking {
/// The thinking content.
thinking: String,
},
/// A tool invocation.
ToolUse {
/// Unique ID for this tool call.
id: String,
/// Tool name (e.g. `"Bash"`, `"Read"`, `"Write"`).
name: String,
/// Tool call arguments as raw JSON.
input: Value,
},
/// Result of a tool invocation.
ToolResult {
/// The `tool_use` id this result corresponds to.
tool_use_id: String,
/// Result content (string or nested blocks).
content: Option<Value>,
/// `true` if the tool returned an error.
is_error: Option<bool>,
},
/// An image block.
Image {
/// Image source details.
source: Option<Value>,
},
/// Catch-all for unknown block types encountered in real data.
#[serde(other)]
Unknown,
}
/// Token usage for an assistant message.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Usage {
/// Tokens in the input prompt.
pub input_tokens: Option<u64>,
/// Tokens in the generated output.
pub output_tokens: Option<u64>,
/// Tokens read from the cache.
pub cache_read_input_tokens: Option<u64>,
/// Tokens written to the cache.
pub cache_creation_input_tokens: Option<u64>,
}
/// System-level message metadata.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemMessage {
/// System message type (e.g. `"turn_duration"`).
#[serde(rename = "type")]
pub msg_type: Option<String>,
/// Duration in milliseconds (for `turn_duration` entries).
pub duration_ms: Option<u64>,
}
#[cfg(test)]
mod tests {
use super::*;
/// An assistant message with `text` and `thinking` blocks deserializes correctly.
#[test]
fn parse_assistant_text_and_thinking() {
let json = r#"{
"type": "assistant",
"session_id": "abc123",
"message": {
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "Let me think about this."},
{"type": "text", "text": "Here is my answer."}
],
"usage": {
"input_tokens": 100,
"output_tokens": 50
},
"model": "claude-opus-4-5"
}
}"#;
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
assert_eq!(entry.entry_type.as_deref(), Some("assistant"));
let msg = entry.message.expect("should have message");
assert_eq!(msg.role.as_deref(), Some("assistant"));
assert_eq!(msg.model.as_deref(), Some("claude-opus-4-5"));
let usage = msg.usage.expect("should have usage");
assert_eq!(usage.input_tokens, Some(100));
assert_eq!(usage.output_tokens, Some(50));
if let Some(MessageContent::Blocks(blocks)) = msg.content {
assert_eq!(blocks.len(), 2);
assert!(matches!(blocks[0], ContentBlock::Thinking { .. }));
assert!(matches!(blocks[1], ContentBlock::Text { .. }));
} else {
panic!("expected Blocks content");
}
}
/// A user message with a `tool_result` block deserializes correctly.
#[test]
fn parse_user_tool_result() {
let json = r#"{
"type": "user",
"session_id": "def456",
"message": {
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "toolu_01",
"content": "command output here",
"is_error": false
}
]
}
}"#;
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
let msg = entry.message.expect("should have message");
if let Some(MessageContent::Blocks(blocks)) = msg.content {
assert_eq!(blocks.len(), 1);
if let ContentBlock::ToolResult {
tool_use_id,
is_error,
..
} = &blocks[0]
{
assert_eq!(tool_use_id, "toolu_01");
assert_eq!(*is_error, Some(false));
} else {
panic!("expected ToolResult block");
}
} else {
panic!("expected Blocks content");
}
}
/// A `RawEntry` with unknown fields deserializes without error.
#[test]
fn raw_entry_unknown_fields_ok() {
let json = r#"{
"type": "progress",
"session_id": "ghi789",
"permissionMode": "auto",
"agentId": "some-agent-id",
"planContent": {"steps": []},
"unknownFutureField": true
}"#;
let entry: RawEntry =
serde_json::from_str(json).expect("should parse despite unknown fields");
assert_eq!(entry.entry_type.as_deref(), Some("progress"));
// Extra fields captured in the flatten map
assert!(entry.extra.contains_key("permissionMode"));
assert!(entry.extra.contains_key("agentId"));
}
/// A message with `content` as a plain string produces `MessageContent::Text`.
#[test]
fn parse_message_content_as_string() {
let json = r#"{
"type": "user",
"session_id": "jkl012",
"message": {
"role": "user",
"content": "Hello, Claude!"
}
}"#;
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
let msg = entry.message.expect("should have message");
if let Some(MessageContent::Text(text)) = msg.content {
assert_eq!(text, "Hello, Claude!");
} else {
panic!("expected Text content");
}
}
}

@ -0,0 +1,181 @@
//! Session statistics computed from raw JSONL entries.
use std::collections::HashMap;
use crate::models::session::{ContentBlock, MessageContent, RawEntry};
/// Aggregated statistics for a session.
#[derive(Debug, Clone, Default)]
pub struct SessionStats {
/// Total input tokens across all assistant messages.
pub input_tokens: u64,
/// Total output tokens across all assistant messages.
pub output_tokens: u64,
/// Total cache-read tokens across all assistant messages.
pub cache_read_tokens: u64,
/// Total cache-creation tokens across all assistant messages.
pub cache_creation_tokens: u64,
/// Tool call counts by tool name.
pub tool_calls: HashMap<String, u64>,
/// Total duration in milliseconds (from system `turn_duration` entries).
pub duration_ms: u64,
/// Model identifier from the first assistant message that has one.
pub model: Option<String>,
/// Total number of user messages.
pub user_message_count: u64,
/// Total number of assistant messages.
pub assistant_message_count: u64,
}
/// Compute session statistics from a slice of raw entries.
pub fn compute_stats(entries: &[RawEntry]) -> SessionStats {
let mut stats = SessionStats::default();
for entry in entries {
match entry.entry_type.as_deref() {
Some("assistant") => {
if let Some(msg) = &entry.message {
stats.assistant_message_count += 1;
// Accumulate usage tokens.
if let Some(usage) = &msg.usage {
stats.input_tokens += usage.input_tokens.unwrap_or(0);
stats.output_tokens += usage.output_tokens.unwrap_or(0);
stats.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
stats.cache_creation_tokens +=
usage.cache_creation_input_tokens.unwrap_or(0);
}
// Capture model from first assistant message that has one.
if stats.model.is_none() {
stats.model = msg.model.clone();
}
// Count tool_use blocks.
if let Some(MessageContent::Blocks(blocks)) = &msg.content {
for block in blocks {
if let ContentBlock::ToolUse { name, .. } = block {
*stats.tool_calls.entry(name.clone()).or_insert(0) += 1;
}
}
}
}
}
Some("user") => {
if entry.message.is_some() {
stats.user_message_count += 1;
}
}
Some("system") => {
// Accumulate duration from `turn_duration` system entries.
if let Some(dur) = entry.duration_ms {
stats.duration_ms += dur;
}
}
_ => {}
}
}
stats
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::session::{Message, MessageContent, RawEntry, Usage};
fn make_entry(entry_type: &str) -> RawEntry {
RawEntry {
entry_type: Some(entry_type.to_string()),
session_id: None,
parent_session_id: None,
message: None,
system_message: None,
cwd: None,
timestamp: None,
duration_ms: None,
extra: Default::default(),
}
}
/// Empty entry slice produces all-zero stats with no model.
#[test]
fn empty_entries_all_zeros() {
let stats = compute_stats(&[]);
assert_eq!(stats.input_tokens, 0);
assert_eq!(stats.output_tokens, 0);
assert_eq!(stats.cache_read_tokens, 0);
assert_eq!(stats.cache_creation_tokens, 0);
assert_eq!(stats.duration_ms, 0);
assert_eq!(stats.user_message_count, 0);
assert_eq!(stats.assistant_message_count, 0);
assert!(stats.model.is_none());
assert!(stats.tool_calls.is_empty());
}
/// A single assistant entry with usage produces correct token counts.
#[test]
fn single_assistant_usage() {
let mut entry = make_entry("assistant");
entry.message = Some(Message {
role: Some("assistant".to_string()),
content: None,
usage: Some(Usage {
input_tokens: Some(200),
output_tokens: Some(80),
cache_read_input_tokens: Some(50),
cache_creation_input_tokens: Some(10),
}),
model: Some("claude-opus-4-5".to_string()),
stop_reason: None,
});
let stats = compute_stats(&[entry]);
assert_eq!(stats.input_tokens, 200);
assert_eq!(stats.output_tokens, 80);
assert_eq!(stats.cache_read_tokens, 50);
assert_eq!(stats.cache_creation_tokens, 10);
assert_eq!(stats.assistant_message_count, 1);
assert_eq!(stats.model.as_deref(), Some("claude-opus-4-5"));
}
/// An assistant entry with tool_use blocks increments tool call counts.
#[test]
fn assistant_tool_use_counts() {
let mut entry = make_entry("assistant");
entry.message = Some(Message {
role: Some("assistant".to_string()),
content: Some(MessageContent::Blocks(vec![
ContentBlock::ToolUse {
id: "t1".to_string(),
name: "Bash".to_string(),
input: serde_json::Value::Null,
},
ContentBlock::ToolUse {
id: "t2".to_string(),
name: "Read".to_string(),
input: serde_json::Value::Null,
},
ContentBlock::ToolUse {
id: "t3".to_string(),
name: "Bash".to_string(),
input: serde_json::Value::Null,
},
])),
usage: None,
model: None,
stop_reason: None,
});
let stats = compute_stats(&[entry]);
assert_eq!(stats.tool_calls.get("Bash"), Some(&2));
assert_eq!(stats.tool_calls.get("Read"), Some(&1));
}
/// Multiple entries with `duration_ms` are summed correctly.
#[test]
fn duration_summed_across_entries() {
let mut e1 = make_entry("system");
e1.duration_ms = Some(1000);
let mut e2 = make_entry("system");
e2.duration_ms = Some(2500);
let stats = compute_stats(&[e1, e2]);
assert_eq!(stats.duration_ms, 3500);
}
}
Loading…
Cancel
Save