From d6d7327c90c5b034a842d5a4c27f0bc73ca6274b Mon Sep 17 00:00:00 2001
From: Elijah Voigt <elijah.caine.mv@gmail.com>
Date: Tue, 10 Mar 2026 14:02:26 -0700
Subject: [PATCH] =?UTF-8?q?docs(edu):=20write=20=C2=A79=20generating=20emb?=
 =?UTF-8?q?eddings=20in=20Rust=20for=20vector-db=20course=20[4c961f]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 edu/src/vector-db.md | 74 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)
diff --git a/edu/src/vector-db.md b/edu/src/vector-db.md
index 50b117a..f9be761 100644
--- a/edu/src/vector-db.md
+++ b/edu/src/vector-db.md
@@ -822,7 +822,79 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 ### 9. Generating Embeddings in Rust
 
-Before you can search by meaning, you need a way to convert text into vectors. This section covers two approaches available in Rust: running a local embedding model with `fastembed-rs` (no API key, works offline, suited for smaller models) and calling an HTTP embedding API such as the OpenAI Embeddings endpoint (larger, higher-quality models at the cost of latency and a network dependency). 🚧 Full content tracked in [nbd:4c961f].
+Before you can search by meaning, you need a way to convert text into vectors. This section covers two approaches available in Rust: running a local embedding model with `fastembed-rs` (no API key, works offline, suited for smaller models) and calling an HTTP embedding API such as the OpenAI Embeddings endpoint (larger, higher-quality models at the cost of latency and a network dependency).
+
+**Option A — fastembed-rs (local, recommended for exercises).** The `fastembed` crate wraps ONNX Runtime and ships pre-trained sentence-transformer models. No API key is required, it works fully offline after the first run, inference is CPU-only, and results are deterministic — all properties that make it ideal for the exercises in §10–§12. Add it to your project:
+
+```toml
+fastembed = "4"
+```
+
+The default model is BGE-Small-EN-v1.5, which produces 384-dimensional vectors. On first use, the model weights (~130 MB) are downloaded to `~/.cache/huggingface/hub/` and reused from there on subsequent runs. Here is the minimal code to embed two strings:
+
+```rust
+use fastembed::{TextEmbedding, InitOptions, EmbeddingModel};
+
+let model = TextEmbedding::try_new(
+    InitOptions::new(EmbeddingModel::BGESmallENV15)
+        .with_show_download_progress(true),
+)?;
+
+let docs = vec!["hello world", "Rust is fast"];
+let embeddings: Vec<Vec<f32>> = model.embed(docs, None)?;
+// embeddings[0].len() == 384
+```
+
+**Batch embedding matters.** Passing multiple strings in a single `model.embed()` call is significantly more efficient than embedding one string at a time, because the runtime can batch tensor operations. Always collect your corpus into a `Vec` and embed it in one shot rather than looping with individual calls.
+
+**Option B — HTTP API (OpenAI-compatible).** When you need a specific production-grade model — or your deployment already relies on an external embeddings service — you can call an OpenAI-compatible endpoint instead. You will need three additional crates:
+
+```toml
+reqwest = { version = "0.12", features = ["json"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+```
+
+Define request and response types that match the API schema:
+
+```rust
+#[derive(serde::Serialize)]
+struct EmbedRequest {
+    model: String,
+    input: Vec<String>,
+}
+
+#[derive(serde::Deserialize)]
+struct EmbedResponse {
+    data: Vec<EmbedData>,
+}
+
+#[derive(serde::Deserialize)]
+struct EmbedData {
+    embedding: Vec<f32>,
+}
+
+async fn embed_texts(texts: Vec<String>) -> anyhow::Result<Vec<Vec<f32>>> {
+    let api_key = std::env::var("OPENAI_API_KEY")?;
+    let client = reqwest::Client::new();
+    let res: EmbedResponse = client
+        .post("https://api.openai.com/v1/embeddings")
+        .bearer_auth(&api_key)
+        .json(&EmbedRequest {
+            model: "text-embedding-3-small".into(),
+            input: texts,
+        })
+        .send()
+        .await?
+        .json()
+        .await?;
+    Ok(res.data.into_iter().map(|d| d.embedding).collect())
+}
+```
+
+**Choosing between them.** For the remaining exercises in this course (§10–§12), use `fastembed`. It requires no API key, has no network dependency, and produces deterministic results — which means your assertions will be stable across runs. Inference is sub-100 ms per batch on a modern CPU, more than fast enough for the dataset sizes used here. Reach for the HTTP approach when you need a specific production-grade model, when your application already communicates with an embeddings service, or when you need multilingual support beyond what the local models offer.
+
+**Dimensionality note.** The `F32_BLOB(d)` column type you define in your schema must match the model's output dimension exactly — you cannot mix dimensions within a single column. The toy examples in §6–§8 used `F32_BLOB(3)` for hand-written 3-D vectors. Now that you are working with real models, change that declaration to `F32_BLOB(384)` for BGE-Small-EN-v1.5, `F32_BLOB(768)` for all-MiniLM-L6-v2, or `F32_BLOB(1536)` for OpenAI's text-embedding-3-small. If you change the dimension of an existing column, you must drop and recreate both the column and its associated vector index — sqlite-vec cannot reindex vectors whose dimensions have changed.
 
 ---