From 0462586d88f2c1c110c81823518586f64171b483 Mon Sep 17 00:00:00 2001 From: Elijah Voigt Date: Tue, 10 Mar 2026 13:59:24 -0700 Subject: [PATCH] =?UTF-8?q?docs(edu):=20write=20=C2=A77=20exercise=201=20s?= =?UTF-8?q?toring=20vectors=20for=20vector-db=20course=20[081a55]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- edu/src/vector-db.md | 224 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 223 insertions(+), 1 deletion(-) diff --git a/edu/src/vector-db.md b/edu/src/vector-db.md index 5f58f55..23a9263 100644 --- a/edu/src/vector-db.md +++ b/edu/src/vector-db.md @@ -349,7 +349,229 @@ You now have a working local vector database. Exercises 1 through 5 build on thi ### 7. Exercise 1 โ€” Storing and Retrieving Vectors -**Goal:** Insert a small set of labelled vectors into the `items` table created in ยง6, then retrieve them with a `SELECT` and deserialize the stored blob back into a Rust `Vec`. ๐Ÿšง Full content tracked in [nbd:081a55]. +**Goal:** Insert 6 labelled 3-dimensional vectors into the `items` table created in ยง6, then `SELECT` all rows and print each label alongside its deserialized `Vec`. + +#### The Dataset + +We use a tiny hand-crafted set of 3D vectors so the results are easy to verify by inspection. The vectors are designed so that items in the same category cluster together โ€” animals near `[high, low, low]`, vehicles near `[low, high, low]`, and programming languages near `[low, low, high]`: + +| id | label | embedding | +|---|---|---| +| 1 | "cat" | [0.9, 0.1, 0.2] | +| 2 | "dog" | [0.8, 0.2, 0.3] | +| 3 | "car" | [0.1, 0.9, 0.1] | +| 4 | "truck" | [0.2, 0.8, 0.2] | +| 5 | "python" | [0.15, 0.1, 0.95] | +| 6 | "rust" | [0.1, 0.05, 0.9] | + +In later exercises you will query these vectors to see how cosine distance naturally separates the three clusters. + +#### Step 1 โ€” Formatting a Vector for INSERT + +sqlite-vec's `vector(?)` SQL function accepts a **JSON array string** โ€” for example `"[0.9,0.1,0.2]"`. You pass this string as a text parameter and `vector()` converts it into the internal `F32_BLOB` format for storage. + +A small helper keeps the conversion in one place: + +```rust +fn vec_to_json(v: &[f32]) -> String { + format!("[{}]", v.iter().map(|x| x.to_string()).collect::>().join(",")) +} +``` + +Calling `vec_to_json(&[0.9, 0.1, 0.2])` returns the string `"[0.9,0.1,0.2]"`, ready to bind as a SQL parameter. + +#### Step 2 โ€” Inserting Rows + +Use `INSERT OR IGNORE` so the program is **idempotent** โ€” running it twice does not produce duplicate-key errors or duplicate data: + +```sql +INSERT OR IGNORE INTO items (id, label, embedding) VALUES (?, ?, vector(?)) +``` + +Define the dataset as a `Vec<(i64, &str, Vec)>` and loop over it: + +```rust +let data: Vec<(i64, &str, Vec)> = vec![ + (1, "cat", vec![0.9, 0.1, 0.2]), + (2, "dog", vec![0.8, 0.2, 0.3]), + (3, "car", vec![0.1, 0.9, 0.1]), + (4, "truck", vec![0.2, 0.8, 0.2]), + (5, "python", vec![0.15, 0.1, 0.95]), + (6, "rust", vec![0.1, 0.05, 0.9]), +]; + +for (id, label, embedding) in &data { + conn.execute( + "INSERT OR IGNORE INTO items (id, label, embedding) VALUES (?, ?, vector(?))", + libsql::params![*id, *label, vec_to_json(embedding)], + ).await?; +} +println!("Inserted {} rows.", data.len()); +``` + +#### Step 3 โ€” Selecting and Deserializing + +Query all rows back out. The `vector_extract` function converts the stored `F32_BLOB` back into a JSON array string that you can parse in Rust: + +```sql +SELECT id, label, vector_extract(embedding) FROM items ORDER BY id +``` + +Add `serde_json` to your `Cargo.toml` dependencies for JSON parsing: + +```toml +serde_json = "1" +``` + +Then fetch and deserialize: + +```rust +let mut rows = conn + .query("SELECT id, label, vector_extract(embedding) FROM items ORDER BY id", ()) + .await?; + +while let Some(row) = rows.next().await? { + let id: i64 = row.get(0)?; + let label: String = row.get(1)?; + let json_str: String = row.get(2)?; + let embedding: Vec = serde_json::from_str(&json_str)?; + println!("{id:<3}{label:<10}{embedding:?}"); +} +``` + +#### Step 4 โ€” Expected Output + +Running `cargo run` should print: + +``` +SQLite version: 3.46.0 +Database ready. +Inserted 6 rows. +1 cat [0.9, 0.1, 0.2] +2 dog [0.8, 0.2, 0.3] +3 car [0.1, 0.9, 0.1] +4 truck [0.2, 0.8, 0.2] +5 python [0.15, 0.1, 0.95] +6 rust [0.1, 0.05, 0.9] +``` + +Every vector round-trips through the database intact: Rust `Vec` โ†’ JSON string โ†’ `vector()` โ†’ `F32_BLOB` storage โ†’ `vector_extract()` โ†’ JSON string โ†’ `serde_json` โ†’ Rust `Vec`. + +#### Cargo.toml Additions + +Your full `[dependencies]` section should now be: + +```toml +[dependencies] +libsql = "0.9" +tokio = { version = "1", features = ["full"] } +serde_json = "1" +``` + +#### Reference Solution + +
Show full solution + +**`Cargo.toml`** (dependencies only): + +```toml +[dependencies] +libsql = "0.9" +tokio = { version = "1", features = ["full"] } +serde_json = "1" +``` + +**`src/main.rs`**: + +```rust +use libsql::{Builder, Database}; + +/// Convert a float slice to a JSON array string for sqlite-vec's `vector()` function. +fn vec_to_json(v: &[f32]) -> String { + format!( + "[{}]", + v.iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + ) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + // --- Open database --- + let db: Database = Builder::new_local("vectors.db").build().await?; + let conn = db.connect()?; + + // Verify connection + let mut rows = conn.query("SELECT sqlite_version()", ()).await?; + if let Some(row) = rows.next().await? { + let version: String = row.get(0)?; + println!("SQLite version: {version}"); + } + + // --- Create table (from ยง6) --- + conn.execute( + "CREATE TABLE IF NOT EXISTS items ( + id INTEGER PRIMARY KEY, + label TEXT NOT NULL, + embedding F32_BLOB(3) NOT NULL + )", + (), + ) + .await?; + + // --- Create HNSW index (from ยง6) --- + conn.execute( + "CREATE INDEX IF NOT EXISTS items_vec_idx + ON items (embedding) + USING libsql_vector_idx(embedding)", + (), + ) + .await?; + + println!("Database ready."); + + // --- Insert 6 labelled vectors --- + let data: Vec<(i64, &str, Vec)> = vec![ + (1, "cat", vec![0.9, 0.1, 0.2]), + (2, "dog", vec![0.8, 0.2, 0.3]), + (3, "car", vec![0.1, 0.9, 0.1]), + (4, "truck", vec![0.2, 0.8, 0.2]), + (5, "python", vec![0.15, 0.1, 0.95]), + (6, "rust", vec![0.1, 0.05, 0.9]), + ]; + + for (id, label, embedding) in &data { + conn.execute( + "INSERT OR IGNORE INTO items (id, label, embedding) VALUES (?, ?, vector(?))", + libsql::params![*id, *label, vec_to_json(embedding)], + ) + .await?; + } + println!("Inserted {} rows.", data.len()); + + // --- Select and deserialize --- + let mut rows = conn + .query( + "SELECT id, label, vector_extract(embedding) FROM items ORDER BY id", + (), + ) + .await?; + + while let Some(row) = rows.next().await? { + let id: i64 = row.get(0)?; + let label: String = row.get(1)?; + let json_str: String = row.get(2)?; + let embedding: Vec = serde_json::from_str(&json_str)?; + println!("{id:<3}{label:<10}{embedding:?}"); + } + + Ok(()) +} +``` + +
---