diff --git a/.beans/claudbg-76fy--implement-jsonl-session-file-reader.md b/.beans/claudbg-76fy--implement-jsonl-session-file-reader.md index 00ad2f4..6fb0c2f 100644 --- a/.beans/claudbg-76fy--implement-jsonl-session-file-reader.md +++ b/.beans/claudbg-76fy--implement-jsonl-session-file-reader.md @@ -1,10 +1,11 @@ --- # claudbg-76fy title: Implement JSONL session file reader -status: todo +status: in-progress type: task +priority: normal created_at: 2026-03-27T19:39:15Z -updated_at: 2026-03-27T19:39:15Z +updated_at: 2026-03-28T05:45:28Z parent: claudbg-mztt --- diff --git a/.beans/claudbg-g5uv--session-file-discovery.md b/.beans/claudbg-g5uv--session-file-discovery.md index 6107d59..9dbbe03 100644 --- a/.beans/claudbg-g5uv--session-file-discovery.md +++ b/.beans/claudbg-g5uv--session-file-discovery.md @@ -1,10 +1,11 @@ --- # claudbg-g5uv title: Session file discovery -status: todo +status: in-progress type: task +priority: normal created_at: 2026-03-27T19:39:15Z -updated_at: 2026-03-27T19:39:15Z +updated_at: 2026-03-28T05:43:18Z parent: claudbg-mztt --- diff --git a/.beans/claudbg-jupi--sub-agent-file-discovery.md b/.beans/claudbg-jupi--sub-agent-file-discovery.md index 30e9158..91e7796 100644 --- a/.beans/claudbg-jupi--sub-agent-file-discovery.md +++ b/.beans/claudbg-jupi--sub-agent-file-discovery.md @@ -1,10 +1,11 @@ --- # claudbg-jupi title: Sub-agent file discovery -status: todo +status: in-progress type: task +priority: normal created_at: 2026-03-27T19:39:15Z -updated_at: 2026-03-27T19:39:15Z +updated_at: 2026-03-28T05:45:22Z parent: claudbg-mztt --- diff --git a/src/lib.rs b/src/lib.rs index 9ca3cd8..78e6d57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod cli; pub mod error; pub mod models; // pub mod output; +pub mod parser; pub mod util; #[cfg(test)] diff --git a/src/parser/discovery.rs b/src/parser/discovery.rs new file mode 100644 index 0000000..ada1d34 --- /dev/null +++ b/src/parser/discovery.rs @@ -0,0 +1,451 @@ +//! Discovery of Claude Code session and sub-agent JSONL files on disk. + +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; + +use chrono::{DateTime, Utc}; + +use crate::models::session::RawEntry; + +/// Reference to a discovered session file on disk. +#[derive(Debug, Clone)] +pub struct SessionRef { + /// The session UUID (filename stem, without `.jsonl`). + pub session_id: String, + /// The project path recovered from the JSONL `cwd` field. + /// `None` if the file could not be parsed to find a `cwd`. + pub project_path: Option, + /// Absolute path to the `.jsonl` file. + pub file_path: PathBuf, + /// Last-modified time of the file. + pub modified_at: DateTime, +} + +/// Reference to a discovered sub-agent JSONL file. +#[derive(Debug, Clone)] +pub struct AgentRef { + /// The agent's UUID (from filename: `agent-{uuid}.jsonl`). + pub agent_id: String, + /// The parent session UUID (read from first line of the agent JSONL). + pub session_id: String, + /// Agent type, from `agent-{id}.meta.json` if present. + pub agent_type: Option, + /// Absolute path to the agent's `.jsonl` file. + pub file_path: PathBuf, + /// Last-modified time of the file. + pub modified_at: DateTime, +} + +/// Resolve the `~/.claude/projects/` directory path. +/// +/// Uses `HOME` environment variable to expand `~`. +fn claude_projects_dir() -> Option { + let home = std::env::var("HOME").ok()?; + Some(PathBuf::from(home).join(".claude").join("projects")) +} + +/// Read the first non-empty line of a file and attempt to extract `cwd` from it. +/// +/// Returns `None` if the file cannot be read or the first line cannot be parsed. +fn read_cwd_from_first_line(path: &Path) -> Option { + let file = std::fs::File::open(path).ok()?; + let reader = BufReader::new(file); + for line in reader.lines() { + let line = line.ok()?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if let Ok(entry) = serde_json::from_str::(trimmed) + && entry.cwd.is_some() + { + return entry.cwd; + } + // Return after first non-empty line regardless. + break; + } + None +} + +/// Discover all session JSONL files under `~/.claude/projects/`. +/// +/// Walks one level deep: for each project subdirectory, collects `*.jsonl` files +/// while skipping `subagents/` subdirectories. For each file the session ID is +/// extracted from the filename stem and the project path from the first parseable +/// JSONL line's `cwd` field. +/// +/// Files that cannot be read or stat'd are skipped with a warning to `stderr`. +pub fn discover_sessions() -> crate::error::Result> { + let projects_dir = match claude_projects_dir() { + Some(d) => d, + None => { + eprintln!("claudbg: could not determine HOME directory"); + return Ok(vec![]); + } + }; + + if !projects_dir.exists() { + return Ok(vec![]); + } + + let mut sessions = Vec::new(); + + let project_entries = match std::fs::read_dir(&projects_dir) { + Ok(e) => e, + Err(err) => { + eprintln!( + "claudbg: could not read {}: {}", + projects_dir.display(), + err + ); + return Ok(vec![]); + } + }; + + for proj_entry in project_entries { + let proj_entry = match proj_entry { + Ok(e) => e, + Err(err) => { + eprintln!("claudbg: error reading project dir entry: {err}"); + continue; + } + }; + + let proj_path = proj_entry.path(); + if !proj_path.is_dir() { + continue; + } + + let file_entries = match std::fs::read_dir(&proj_path) { + Ok(e) => e, + Err(err) => { + eprintln!("claudbg: could not read {}: {}", proj_path.display(), err); + continue; + } + }; + + for file_entry in file_entries { + let file_entry = match file_entry { + Ok(e) => e, + Err(err) => { + eprintln!("claudbg: error reading file entry: {err}"); + continue; + } + }; + + let file_path = file_entry.path(); + + // Skip subdirectories (e.g. subagents/). + if file_path.is_dir() { + continue; + } + + // Only process .jsonl files. + if file_path.extension().and_then(|e| e.to_str()) != Some("jsonl") { + continue; + } + + // Extract session_id from filename stem. + let session_id = match file_path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s.to_string(), + None => { + eprintln!( + "claudbg: could not extract session ID from {}", + file_path.display() + ); + continue; + } + }; + + // Get last-modified time. + let modified_at = match file_entry.metadata() { + Ok(meta) => match meta.modified() { + Ok(t) => DateTime::::from(t), + Err(err) => { + eprintln!( + "claudbg: could not read mtime for {}: {}", + file_path.display(), + err + ); + continue; + } + }, + Err(err) => { + eprintln!("claudbg: could not stat {}: {}", file_path.display(), err); + continue; + } + }; + + // Try to extract cwd from first JSONL line. + let project_path = read_cwd_from_first_line(&file_path); + + sessions.push(SessionRef { + session_id, + project_path, + file_path, + modified_at, + }); + } + } + + Ok(sessions) +} + +/// Discover all sub-agent runs for a session given the session's JSONL file path. +/// +/// Looks for `subagents/agent-*.jsonl` files in the same directory as +/// `session_file`. +pub fn discover_agents_for_session(session_file: &Path) -> crate::error::Result> { + let parent = match session_file.parent() { + Some(p) => p, + None => return Ok(vec![]), + }; + let subagents_dir = parent.join("subagents"); + collect_agents_in_dir(&subagents_dir, None) +} + +/// Discover all sub-agent runs across all sessions. +/// +/// Walks `~/.claude/projects/` and collects `agent-*.jsonl` files from every +/// `subagents/` subdirectory found. +pub fn discover_all_agents() -> crate::error::Result> { + let projects_dir = match claude_projects_dir() { + Some(d) => d, + None => { + eprintln!("claudbg: could not determine HOME directory"); + return Ok(vec![]); + } + }; + + if !projects_dir.exists() { + return Ok(vec![]); + } + + let mut agents = Vec::new(); + + let project_entries = match std::fs::read_dir(&projects_dir) { + Ok(e) => e, + Err(err) => { + eprintln!( + "claudbg: could not read {}: {}", + projects_dir.display(), + err + ); + return Ok(vec![]); + } + }; + + for proj_entry in project_entries { + let proj_entry = match proj_entry { + Ok(e) => e, + Err(err) => { + eprintln!("claudbg: error reading project dir entry: {err}"); + continue; + } + }; + + let proj_path = proj_entry.path(); + if !proj_path.is_dir() { + continue; + } + + let subagents_dir = proj_path.join("subagents"); + match collect_agents_in_dir(&subagents_dir, None) { + Ok(mut found) => agents.append(&mut found), + Err(err) => { + eprintln!( + "claudbg: error collecting agents in {}: {}", + subagents_dir.display(), + err + ); + } + } + } + + Ok(agents) +} + +/// Collect all `agent-*.jsonl` files from a single `subagents/` directory. +/// +/// `filter_session_id` optionally restricts results to agents whose first JSONL +/// line's `session_id` field matches the given value. +fn collect_agents_in_dir( + subagents_dir: &Path, + filter_session_id: Option<&str>, +) -> crate::error::Result> { + if !subagents_dir.exists() { + return Ok(vec![]); + } + + let mut agents = Vec::new(); + + let entries = match std::fs::read_dir(subagents_dir) { + Ok(e) => e, + Err(err) => { + eprintln!( + "claudbg: could not read {}: {}", + subagents_dir.display(), + err + ); + return Ok(vec![]); + } + }; + + for entry in entries { + let entry = match entry { + Ok(e) => e, + Err(err) => { + eprintln!("claudbg: error reading subagent entry: {err}"); + continue; + } + }; + + let file_path = entry.path(); + if file_path.is_dir() { + continue; + } + + // Only process agent-*.jsonl files. + let file_name = match file_path.file_name().and_then(|n| n.to_str()) { + Some(n) => n.to_string(), + None => continue, + }; + + if !file_name.starts_with("agent-") || !file_name.ends_with(".jsonl") { + continue; + } + + // Extract agent_id: strip "agent-" prefix and ".jsonl" suffix. + let agent_id = file_name + .strip_prefix("agent-") + .and_then(|s| s.strip_suffix(".jsonl")) + .unwrap_or(&file_name) + .to_string(); + + // Get last-modified time. + let modified_at = match entry.metadata() { + Ok(meta) => match meta.modified() { + Ok(t) => DateTime::::from(t), + Err(err) => { + eprintln!( + "claudbg: could not read mtime for {}: {}", + file_path.display(), + err + ); + continue; + } + }, + Err(err) => { + eprintln!("claudbg: could not stat {}: {}", file_path.display(), err); + continue; + } + }; + + // Read session_id from first line of the agent file. + let session_id = read_session_id_from_first_line(&file_path).unwrap_or_default(); + + // Filter by session_id if requested. + if let Some(filter) = filter_session_id + && session_id != filter + { + continue; + } + + // Try to read agent_type from meta.json. + let meta_path = subagents_dir.join(format!("agent-{agent_id}.meta.json")); + let agent_type = read_agent_type_from_meta(&meta_path); + + agents.push(AgentRef { + agent_id, + session_id, + agent_type, + file_path, + modified_at, + }); + } + + Ok(agents) +} + +/// Read the `session_id` from the first non-empty line of an agent JSONL file. +fn read_session_id_from_first_line(path: &Path) -> Option { + let file = std::fs::File::open(path).ok()?; + let reader = BufReader::new(file); + for line in reader.lines() { + let line = line.ok()?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if let Ok(entry) = serde_json::from_str::(trimmed) + && entry.session_id.is_some() + { + return entry.session_id; + } + break; + } + None +} + +/// Try to read `agent_type` from an `agent-{id}.meta.json` file. +/// +/// Expects a JSON object with an `"agent_type"` string field. +fn read_agent_type_from_meta(meta_path: &Path) -> Option { + let content = std::fs::read_to_string(meta_path).ok()?; + let value: serde_json::Value = serde_json::from_str(&content).ok()?; + value + .get("agent_type") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// `SessionRef` implements `Debug` and `Clone`. + #[test] + fn session_ref_debug_clone() { + let sr = SessionRef { + session_id: "abc123".to_string(), + project_path: Some("/home/user/project".to_string()), + file_path: PathBuf::from("/tmp/abc123.jsonl"), + modified_at: DateTime::::from(std::time::SystemTime::UNIX_EPOCH), + }; + let cloned = sr.clone(); + assert_eq!(cloned.session_id, sr.session_id); + // Ensure Debug works without panic. + let _ = format!("{sr:?}"); + } + + /// `AgentRef` implements `Debug` and `Clone`. + #[test] + fn agent_ref_debug_clone() { + let ar = AgentRef { + agent_id: "def456".to_string(), + session_id: "abc123".to_string(), + agent_type: Some("TaskAgent".to_string()), + file_path: PathBuf::from("/tmp/agent-def456.jsonl"), + modified_at: DateTime::::from(std::time::SystemTime::UNIX_EPOCH), + }; + let cloned = ar.clone(); + assert_eq!(cloned.agent_id, ar.agent_id); + let _ = format!("{ar:?}"); + } + + /// `discover_sessions()` does not panic and returns `Ok` even if + /// `~/.claude/projects/` does not exist. + #[test] + fn discover_sessions_does_not_panic() { + let result = discover_sessions(); + // Should always succeed (empty or non-empty). + assert!(result.is_ok()); + } + + /// `discover_all_agents()` does not panic and returns `Ok`. + #[test] + fn discover_all_agents_does_not_panic() { + let result = discover_all_agents(); + assert!(result.is_ok()); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..e262ba8 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,3 @@ +//! JSONL session file discovery and reading. +pub mod discovery; +pub mod reader; diff --git a/src/parser/reader.rs b/src/parser/reader.rs new file mode 100644 index 0000000..7af387d --- /dev/null +++ b/src/parser/reader.rs @@ -0,0 +1,115 @@ +//! Async JSONL session file reader. + +use std::path::Path; + +use crate::error::Result; +use crate::models::session::RawEntry; + +/// Reads a JSONL session file and returns all successfully-parsed entries. +/// +/// - Skips empty lines. +/// - Skips lines that fail to parse (logs a warning with `eprintln!`). +/// - Uses tokio async I/O with `BufReader` for efficiency. +/// - If the file path does not exist, returns `Err(AppError::Io(...))`. +pub async fn read_session_file(path: &Path) -> Result> { + use tokio::io::AsyncBufReadExt; + + let file = tokio::fs::File::open(path) + .await + .map_err(crate::error::AppError::Io)?; + let reader = tokio::io::BufReader::new(file); + let mut lines = reader.lines(); + + let mut entries = Vec::new(); + + loop { + let line = lines + .next_line() + .await + .map_err(crate::error::AppError::Io)?; + match line { + None => break, + Some(line) => { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(entry) => entries.push(entry), + Err(err) => { + eprintln!("claudbg: skipping unparseable JSONL line: {err}"); + } + } + } + } + } + + Ok(entries) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + /// Helper: create a temp file with the given content, run the async reader. + async fn read_temp(content: &str) -> Result> { + let dir = std::env::temp_dir(); + let path = dir.join(format!( + "claudbg_test_{}.jsonl", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .subsec_nanos() + )); + { + let mut f = std::fs::File::create(&path).expect("create temp file"); + f.write_all(content.as_bytes()).expect("write temp file"); + } + let result = read_session_file(&path).await; + let _ = std::fs::remove_file(&path); + result + } + + /// Two valid JSON lines → two entries returned. + #[tokio::test] + async fn two_valid_lines() { + let content = concat!( + r#"{"type":"user","session_id":"abc"}"#, + "\n", + r#"{"type":"assistant","session_id":"abc"}"#, + "\n" + ); + let entries = read_temp(content).await.expect("should succeed"); + assert_eq!(entries.len(), 2); + } + + /// One valid + one invalid JSON line → one entry returned (invalid skipped). + #[tokio::test] + async fn one_valid_one_invalid() { + let content = concat!( + r#"{"type":"user","session_id":"abc"}"#, + "\n", + "THIS IS NOT JSON\n" + ); + let entries = read_temp(content).await.expect("should succeed"); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].entry_type.as_deref(), Some("user")); + } + + /// Empty file → empty vec returned. + #[tokio::test] + async fn empty_file_returns_empty_vec() { + let entries = read_temp("").await.expect("should succeed"); + assert!(entries.is_empty()); + } + + /// Nonexistent path → returns `Err`. + #[tokio::test] + async fn nonexistent_path_returns_err() { + let path = std::path::PathBuf::from("/tmp/claudbg_nonexistent_8675309.jsonl"); + let result = read_session_file(&path).await; + assert!(result.is_err()); + assert!(matches!(result, Err(crate::error::AppError::Io(_)))); + } +}