From d6d7327c90c5b034a842d5a4c27f0bc73ca6274b Mon Sep 17 00:00:00 2001 From: Elijah Voigt Date: Tue, 10 Mar 2026 14:02:26 -0700 Subject: [PATCH] =?UTF-8?q?docs(edu):=20write=20=C2=A79=20generating=20emb?= =?UTF-8?q?eddings=20in=20Rust=20for=20vector-db=20course=20[4c961f]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- edu/src/vector-db.md | 74 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/edu/src/vector-db.md b/edu/src/vector-db.md index 50b117a..f9be761 100644 --- a/edu/src/vector-db.md +++ b/edu/src/vector-db.md @@ -822,7 +822,79 @@ async fn main() -> Result<(), Box> { ### 9. Generating Embeddings in Rust -Before you can search by meaning, you need a way to convert text into vectors. This section covers two approaches available in Rust: running a local embedding model with `fastembed-rs` (no API key, works offline, suited for smaller models) and calling an HTTP embedding API such as the OpenAI Embeddings endpoint (larger, higher-quality models at the cost of latency and a network dependency). 🚧 Full content tracked in [nbd:4c961f]. +Before you can search by meaning, you need a way to convert text into vectors. This section covers two approaches available in Rust: running a local embedding model with `fastembed-rs` (no API key, works offline, suited for smaller models) and calling an HTTP embedding API such as the OpenAI Embeddings endpoint (larger, higher-quality models at the cost of latency and a network dependency). + +**Option A — fastembed-rs (local, recommended for exercises).** The `fastembed` crate wraps ONNX Runtime and ships pre-trained sentence-transformer models. No API key is required, it works fully offline after the first run, inference is CPU-only, and results are deterministic — all properties that make it ideal for the exercises in §10–§12. Add it to your project: + +```toml +fastembed = "4" +``` + +The default model is BGE-Small-EN-v1.5, which produces 384-dimensional vectors. On first use, the model weights (~130 MB) are downloaded to `~/.cache/huggingface/hub/` and reused from there on subsequent runs. Here is the minimal code to embed two strings: + +```rust +use fastembed::{TextEmbedding, InitOptions, EmbeddingModel}; + +let model = TextEmbedding::try_new( + InitOptions::new(EmbeddingModel::BGESmallENV15) + .with_show_download_progress(true), +)?; + +let docs = vec!["hello world", "Rust is fast"]; +let embeddings: Vec> = model.embed(docs, None)?; +// embeddings[0].len() == 384 +``` + +**Batch embedding matters.** Passing multiple strings in a single `model.embed()` call is significantly more efficient than embedding one string at a time, because the runtime can batch tensor operations. Always collect your corpus into a `Vec` and embed it in one shot rather than looping with individual calls. + +**Option B — HTTP API (OpenAI-compatible).** When you need a specific production-grade model — or your deployment already relies on an external embeddings service — you can call an OpenAI-compatible endpoint instead. You will need three additional crates: + +```toml +reqwest = { version = "0.12", features = ["json"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +``` + +Define request and response types that match the API schema: + +```rust +#[derive(serde::Serialize)] +struct EmbedRequest { + model: String, + input: Vec, +} + +#[derive(serde::Deserialize)] +struct EmbedResponse { + data: Vec, +} + +#[derive(serde::Deserialize)] +struct EmbedData { + embedding: Vec, +} + +async fn embed_texts(texts: Vec) -> anyhow::Result>> { + let api_key = std::env::var("OPENAI_API_KEY")?; + let client = reqwest::Client::new(); + let res: EmbedResponse = client + .post("https://api.openai.com/v1/embeddings") + .bearer_auth(&api_key) + .json(&EmbedRequest { + model: "text-embedding-3-small".into(), + input: texts, + }) + .send() + .await? + .json() + .await?; + Ok(res.data.into_iter().map(|d| d.embedding).collect()) +} +``` + +**Choosing between them.** For the remaining exercises in this course (§10–§12), use `fastembed`. It requires no API key, has no network dependency, and produces deterministic results — which means your assertions will be stable across runs. Inference is sub-100 ms per batch on a modern CPU, more than fast enough for the dataset sizes used here. Reach for the HTTP approach when you need a specific production-grade model, when your application already communicates with an embeddings service, or when you need multilingual support beyond what the local models offer. + +**Dimensionality note.** The `F32_BLOB(d)` column type you define in your schema must match the model's output dimension exactly — you cannot mix dimensions within a single column. The toy examples in §6–§8 used `F32_BLOB(3)` for hand-written 3-D vectors. Now that you are working with real models, change that declaration to `F32_BLOB(384)` for BGE-Small-EN-v1.5, `F32_BLOB(768)` for all-MiniLM-L6-v2, or `F32_BLOB(1536)` for OpenAI's text-embedding-3-small. If you change the dimension of an existing column, you must drop and recreate both the column and its associated vector index — sqlite-vec cannot reindex vectors whose dimensions have changed. ---