feat(models): define Rust types for JSONL schema [claudbg-uls1]
Add src/models/session.rs with permissive RawEntry, Message, MessageContent (untagged), ContentBlock (tagged), Usage, SystemMessage. Add src/models/stats.rs with SessionStats and compute_stats(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>main
parent
63e1f34778
commit
620a3571c6
@ -0,0 +1,3 @@
|
|||||||
|
//! Data models for Claude Code session JSONL files.
|
||||||
|
pub mod session;
|
||||||
|
pub mod stats;
|
||||||
@ -0,0 +1,253 @@
|
|||||||
|
//! Rust types for deserializing Claude Code JSONL session files.
|
||||||
|
//!
|
||||||
|
//! These types are intentionally permissive — real session files contain many
|
||||||
|
//! undocumented fields, so `RawEntry` uses `#[serde(flatten)]` to capture
|
||||||
|
//! unknown fields rather than failing deserialization.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
/// Raw deserialized line from a JSONL session file.
|
||||||
|
///
|
||||||
|
/// Uses permissive deserialization — NO `deny_unknown_fields`.
|
||||||
|
/// All fields are optional because different entry types have different fields.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct RawEntry {
|
||||||
|
/// Entry type: `"user"`, `"assistant"`, `"system"`, `"progress"`,
|
||||||
|
/// `"file-history-snapshot"`, etc.
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub entry_type: Option<String>,
|
||||||
|
/// UUID of this session.
|
||||||
|
pub session_id: Option<String>,
|
||||||
|
/// UUID of the parent agent, if this is a sub-agent run.
|
||||||
|
pub parent_session_id: Option<String>,
|
||||||
|
/// The message payload (present on user/assistant entries).
|
||||||
|
pub message: Option<Message>,
|
||||||
|
/// System message payload.
|
||||||
|
pub system_message: Option<SystemMessage>,
|
||||||
|
/// Current working directory (present on first system entry).
|
||||||
|
pub cwd: Option<String>,
|
||||||
|
/// Timestamp string (ISO 8601).
|
||||||
|
pub timestamp: Option<String>,
|
||||||
|
/// Unix milliseconds of the conversation turn duration.
|
||||||
|
pub duration_ms: Option<u64>,
|
||||||
|
/// Raw JSON for any fields this struct doesn't explicitly model.
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub extra: std::collections::HashMap<String, Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A message within a session entry (user or assistant role).
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Message {
|
||||||
|
/// Role: `"user"` or `"assistant"`.
|
||||||
|
pub role: Option<String>,
|
||||||
|
/// Content: either a plain string or an array of content blocks.
|
||||||
|
pub content: Option<MessageContent>,
|
||||||
|
/// Token usage statistics (present on assistant messages).
|
||||||
|
pub usage: Option<Usage>,
|
||||||
|
/// Model identifier (e.g. `"claude-opus-4-5-20251001"`).
|
||||||
|
pub model: Option<String>,
|
||||||
|
/// Stop reason from the API (e.g. `"end_turn"`, `"tool_use"`).
|
||||||
|
pub stop_reason: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Message content: either a plain text string or an array of typed blocks.
|
||||||
|
///
|
||||||
|
/// Untagged: tries `Blocks` first, falls back to `Text`.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum MessageContent {
|
||||||
|
/// Array of structured content blocks.
|
||||||
|
Blocks(Vec<ContentBlock>),
|
||||||
|
/// Plain text string content.
|
||||||
|
Text(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single content block within a message.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(tag = "type", rename_all = "snake_case")]
|
||||||
|
pub enum ContentBlock {
|
||||||
|
/// Plain text output.
|
||||||
|
Text {
|
||||||
|
/// The text content.
|
||||||
|
text: String,
|
||||||
|
},
|
||||||
|
/// Extended thinking block.
|
||||||
|
Thinking {
|
||||||
|
/// The thinking content.
|
||||||
|
thinking: String,
|
||||||
|
},
|
||||||
|
/// A tool invocation.
|
||||||
|
ToolUse {
|
||||||
|
/// Unique ID for this tool call.
|
||||||
|
id: String,
|
||||||
|
/// Tool name (e.g. `"Bash"`, `"Read"`, `"Write"`).
|
||||||
|
name: String,
|
||||||
|
/// Tool call arguments as raw JSON.
|
||||||
|
input: Value,
|
||||||
|
},
|
||||||
|
/// Result of a tool invocation.
|
||||||
|
ToolResult {
|
||||||
|
/// The `tool_use` id this result corresponds to.
|
||||||
|
tool_use_id: String,
|
||||||
|
/// Result content (string or nested blocks).
|
||||||
|
content: Option<Value>,
|
||||||
|
/// `true` if the tool returned an error.
|
||||||
|
is_error: Option<bool>,
|
||||||
|
},
|
||||||
|
/// An image block.
|
||||||
|
Image {
|
||||||
|
/// Image source details.
|
||||||
|
source: Option<Value>,
|
||||||
|
},
|
||||||
|
/// Catch-all for unknown block types encountered in real data.
|
||||||
|
#[serde(other)]
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Token usage for an assistant message.
|
||||||
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
|
pub struct Usage {
|
||||||
|
/// Tokens in the input prompt.
|
||||||
|
pub input_tokens: Option<u64>,
|
||||||
|
/// Tokens in the generated output.
|
||||||
|
pub output_tokens: Option<u64>,
|
||||||
|
/// Tokens read from the cache.
|
||||||
|
pub cache_read_input_tokens: Option<u64>,
|
||||||
|
/// Tokens written to the cache.
|
||||||
|
pub cache_creation_input_tokens: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// System-level message metadata.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SystemMessage {
|
||||||
|
/// System message type (e.g. `"turn_duration"`).
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub msg_type: Option<String>,
|
||||||
|
/// Duration in milliseconds (for `turn_duration` entries).
|
||||||
|
pub duration_ms: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// An assistant message with `text` and `thinking` blocks deserializes correctly.
|
||||||
|
#[test]
|
||||||
|
fn parse_assistant_text_and_thinking() {
|
||||||
|
let json = r#"{
|
||||||
|
"type": "assistant",
|
||||||
|
"session_id": "abc123",
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "thinking", "thinking": "Let me think about this."},
|
||||||
|
{"type": "text", "text": "Here is my answer."}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 100,
|
||||||
|
"output_tokens": 50
|
||||||
|
},
|
||||||
|
"model": "claude-opus-4-5"
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
|
||||||
|
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
|
||||||
|
assert_eq!(entry.entry_type.as_deref(), Some("assistant"));
|
||||||
|
let msg = entry.message.expect("should have message");
|
||||||
|
assert_eq!(msg.role.as_deref(), Some("assistant"));
|
||||||
|
assert_eq!(msg.model.as_deref(), Some("claude-opus-4-5"));
|
||||||
|
|
||||||
|
let usage = msg.usage.expect("should have usage");
|
||||||
|
assert_eq!(usage.input_tokens, Some(100));
|
||||||
|
assert_eq!(usage.output_tokens, Some(50));
|
||||||
|
|
||||||
|
if let Some(MessageContent::Blocks(blocks)) = msg.content {
|
||||||
|
assert_eq!(blocks.len(), 2);
|
||||||
|
assert!(matches!(blocks[0], ContentBlock::Thinking { .. }));
|
||||||
|
assert!(matches!(blocks[1], ContentBlock::Text { .. }));
|
||||||
|
} else {
|
||||||
|
panic!("expected Blocks content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A user message with a `tool_result` block deserializes correctly.
|
||||||
|
#[test]
|
||||||
|
fn parse_user_tool_result() {
|
||||||
|
let json = r#"{
|
||||||
|
"type": "user",
|
||||||
|
"session_id": "def456",
|
||||||
|
"message": {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": "toolu_01",
|
||||||
|
"content": "command output here",
|
||||||
|
"is_error": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
|
||||||
|
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
|
||||||
|
let msg = entry.message.expect("should have message");
|
||||||
|
if let Some(MessageContent::Blocks(blocks)) = msg.content {
|
||||||
|
assert_eq!(blocks.len(), 1);
|
||||||
|
if let ContentBlock::ToolResult {
|
||||||
|
tool_use_id,
|
||||||
|
is_error,
|
||||||
|
..
|
||||||
|
} = &blocks[0]
|
||||||
|
{
|
||||||
|
assert_eq!(tool_use_id, "toolu_01");
|
||||||
|
assert_eq!(*is_error, Some(false));
|
||||||
|
} else {
|
||||||
|
panic!("expected ToolResult block");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
panic!("expected Blocks content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A `RawEntry` with unknown fields deserializes without error.
|
||||||
|
#[test]
|
||||||
|
fn raw_entry_unknown_fields_ok() {
|
||||||
|
let json = r#"{
|
||||||
|
"type": "progress",
|
||||||
|
"session_id": "ghi789",
|
||||||
|
"permissionMode": "auto",
|
||||||
|
"agentId": "some-agent-id",
|
||||||
|
"planContent": {"steps": []},
|
||||||
|
"unknownFutureField": true
|
||||||
|
}"#;
|
||||||
|
|
||||||
|
let entry: RawEntry =
|
||||||
|
serde_json::from_str(json).expect("should parse despite unknown fields");
|
||||||
|
assert_eq!(entry.entry_type.as_deref(), Some("progress"));
|
||||||
|
// Extra fields captured in the flatten map
|
||||||
|
assert!(entry.extra.contains_key("permissionMode"));
|
||||||
|
assert!(entry.extra.contains_key("agentId"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A message with `content` as a plain string produces `MessageContent::Text`.
|
||||||
|
#[test]
|
||||||
|
fn parse_message_content_as_string() {
|
||||||
|
let json = r#"{
|
||||||
|
"type": "user",
|
||||||
|
"session_id": "jkl012",
|
||||||
|
"message": {
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, Claude!"
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
|
||||||
|
let entry: RawEntry = serde_json::from_str(json).expect("should parse");
|
||||||
|
let msg = entry.message.expect("should have message");
|
||||||
|
if let Some(MessageContent::Text(text)) = msg.content {
|
||||||
|
assert_eq!(text, "Hello, Claude!");
|
||||||
|
} else {
|
||||||
|
panic!("expected Text content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,181 @@
|
|||||||
|
//! Session statistics computed from raw JSONL entries.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::models::session::{ContentBlock, MessageContent, RawEntry};
|
||||||
|
|
||||||
|
/// Aggregated statistics for a session.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct SessionStats {
|
||||||
|
/// Total input tokens across all assistant messages.
|
||||||
|
pub input_tokens: u64,
|
||||||
|
/// Total output tokens across all assistant messages.
|
||||||
|
pub output_tokens: u64,
|
||||||
|
/// Total cache-read tokens across all assistant messages.
|
||||||
|
pub cache_read_tokens: u64,
|
||||||
|
/// Total cache-creation tokens across all assistant messages.
|
||||||
|
pub cache_creation_tokens: u64,
|
||||||
|
/// Tool call counts by tool name.
|
||||||
|
pub tool_calls: HashMap<String, u64>,
|
||||||
|
/// Total duration in milliseconds (from system `turn_duration` entries).
|
||||||
|
pub duration_ms: u64,
|
||||||
|
/// Model identifier from the first assistant message that has one.
|
||||||
|
pub model: Option<String>,
|
||||||
|
/// Total number of user messages.
|
||||||
|
pub user_message_count: u64,
|
||||||
|
/// Total number of assistant messages.
|
||||||
|
pub assistant_message_count: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute session statistics from a slice of raw entries.
|
||||||
|
pub fn compute_stats(entries: &[RawEntry]) -> SessionStats {
|
||||||
|
let mut stats = SessionStats::default();
|
||||||
|
for entry in entries {
|
||||||
|
match entry.entry_type.as_deref() {
|
||||||
|
Some("assistant") => {
|
||||||
|
if let Some(msg) = &entry.message {
|
||||||
|
stats.assistant_message_count += 1;
|
||||||
|
// Accumulate usage tokens.
|
||||||
|
if let Some(usage) = &msg.usage {
|
||||||
|
stats.input_tokens += usage.input_tokens.unwrap_or(0);
|
||||||
|
stats.output_tokens += usage.output_tokens.unwrap_or(0);
|
||||||
|
stats.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
|
||||||
|
stats.cache_creation_tokens +=
|
||||||
|
usage.cache_creation_input_tokens.unwrap_or(0);
|
||||||
|
}
|
||||||
|
// Capture model from first assistant message that has one.
|
||||||
|
if stats.model.is_none() {
|
||||||
|
stats.model = msg.model.clone();
|
||||||
|
}
|
||||||
|
// Count tool_use blocks.
|
||||||
|
if let Some(MessageContent::Blocks(blocks)) = &msg.content {
|
||||||
|
for block in blocks {
|
||||||
|
if let ContentBlock::ToolUse { name, .. } = block {
|
||||||
|
*stats.tool_calls.entry(name.clone()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some("user") => {
|
||||||
|
if entry.message.is_some() {
|
||||||
|
stats.user_message_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some("system") => {
|
||||||
|
// Accumulate duration from `turn_duration` system entries.
|
||||||
|
if let Some(dur) = entry.duration_ms {
|
||||||
|
stats.duration_ms += dur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::models::session::{Message, MessageContent, RawEntry, Usage};
|
||||||
|
|
||||||
|
fn make_entry(entry_type: &str) -> RawEntry {
|
||||||
|
RawEntry {
|
||||||
|
entry_type: Some(entry_type.to_string()),
|
||||||
|
session_id: None,
|
||||||
|
parent_session_id: None,
|
||||||
|
message: None,
|
||||||
|
system_message: None,
|
||||||
|
cwd: None,
|
||||||
|
timestamp: None,
|
||||||
|
duration_ms: None,
|
||||||
|
extra: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Empty entry slice produces all-zero stats with no model.
|
||||||
|
#[test]
|
||||||
|
fn empty_entries_all_zeros() {
|
||||||
|
let stats = compute_stats(&[]);
|
||||||
|
assert_eq!(stats.input_tokens, 0);
|
||||||
|
assert_eq!(stats.output_tokens, 0);
|
||||||
|
assert_eq!(stats.cache_read_tokens, 0);
|
||||||
|
assert_eq!(stats.cache_creation_tokens, 0);
|
||||||
|
assert_eq!(stats.duration_ms, 0);
|
||||||
|
assert_eq!(stats.user_message_count, 0);
|
||||||
|
assert_eq!(stats.assistant_message_count, 0);
|
||||||
|
assert!(stats.model.is_none());
|
||||||
|
assert!(stats.tool_calls.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single assistant entry with usage produces correct token counts.
|
||||||
|
#[test]
|
||||||
|
fn single_assistant_usage() {
|
||||||
|
let mut entry = make_entry("assistant");
|
||||||
|
entry.message = Some(Message {
|
||||||
|
role: Some("assistant".to_string()),
|
||||||
|
content: None,
|
||||||
|
usage: Some(Usage {
|
||||||
|
input_tokens: Some(200),
|
||||||
|
output_tokens: Some(80),
|
||||||
|
cache_read_input_tokens: Some(50),
|
||||||
|
cache_creation_input_tokens: Some(10),
|
||||||
|
}),
|
||||||
|
model: Some("claude-opus-4-5".to_string()),
|
||||||
|
stop_reason: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let stats = compute_stats(&[entry]);
|
||||||
|
assert_eq!(stats.input_tokens, 200);
|
||||||
|
assert_eq!(stats.output_tokens, 80);
|
||||||
|
assert_eq!(stats.cache_read_tokens, 50);
|
||||||
|
assert_eq!(stats.cache_creation_tokens, 10);
|
||||||
|
assert_eq!(stats.assistant_message_count, 1);
|
||||||
|
assert_eq!(stats.model.as_deref(), Some("claude-opus-4-5"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An assistant entry with tool_use blocks increments tool call counts.
|
||||||
|
#[test]
|
||||||
|
fn assistant_tool_use_counts() {
|
||||||
|
let mut entry = make_entry("assistant");
|
||||||
|
entry.message = Some(Message {
|
||||||
|
role: Some("assistant".to_string()),
|
||||||
|
content: Some(MessageContent::Blocks(vec![
|
||||||
|
ContentBlock::ToolUse {
|
||||||
|
id: "t1".to_string(),
|
||||||
|
name: "Bash".to_string(),
|
||||||
|
input: serde_json::Value::Null,
|
||||||
|
},
|
||||||
|
ContentBlock::ToolUse {
|
||||||
|
id: "t2".to_string(),
|
||||||
|
name: "Read".to_string(),
|
||||||
|
input: serde_json::Value::Null,
|
||||||
|
},
|
||||||
|
ContentBlock::ToolUse {
|
||||||
|
id: "t3".to_string(),
|
||||||
|
name: "Bash".to_string(),
|
||||||
|
input: serde_json::Value::Null,
|
||||||
|
},
|
||||||
|
])),
|
||||||
|
usage: None,
|
||||||
|
model: None,
|
||||||
|
stop_reason: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let stats = compute_stats(&[entry]);
|
||||||
|
assert_eq!(stats.tool_calls.get("Bash"), Some(&2));
|
||||||
|
assert_eq!(stats.tool_calls.get("Read"), Some(&1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Multiple entries with `duration_ms` are summed correctly.
|
||||||
|
#[test]
|
||||||
|
fn duration_summed_across_entries() {
|
||||||
|
let mut e1 = make_entry("system");
|
||||||
|
e1.duration_ms = Some(1000);
|
||||||
|
let mut e2 = make_entry("system");
|
||||||
|
e2.duration_ms = Some(2500);
|
||||||
|
|
||||||
|
let stats = compute_stats(&[e1, e2]);
|
||||||
|
assert_eq!(stats.duration_ms, 3500);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue