This vignette shows a complete DuckDB workflow for
RAGFlowChainR:
- Create a local vector store
- Insert text chunks and build indexes
- Query with
create_rag_chain()
2) Build a small local DuckDB vector store
# Tiny deterministic embedder for demos (8 dimensions).
toy_embed <- function(x, embedding_dim = 8) {
if (is.data.frame(x) && "page_content" %in% names(x)) {
x$embedding <- toy_embed(x$page_content, embedding_dim = embedding_dim)
return(x)
}
if (!length(x)) {
return(matrix(numeric(0), nrow = 0, ncol = embedding_dim))
}
set.seed(42)
matrix(runif(length(x) * embedding_dim), nrow = length(x), ncol = embedding_dim)
}
db_path <- tempfile(fileext = ".duckdb")
con <- create_vectorstore(
db_path = db_path,
overwrite = TRUE,
embedding_dim = 8
)
docs <- data.frame(
page_content = c(
"R is a language for statistics and data science.",
"DuckDB is an in-process analytical database.",
"RAG combines retrieval and generation for grounded answers."
),
stringsAsFactors = FALSE
)
insert_vectors(
con = con,
df = docs,
embed_fun = toy_embed,
embedding_dim = 8
)
build_vector_index(con)
DBI::dbDisconnect(con)3) Create a DuckDB RAG chain
mock_llm <- function(prompt) {
"This is a mock answer from the LLM."
}
rag_chain <- create_rag_chain(
llm = mock_llm,
vector_database_directory = db_path,
method = "DuckDB",
embedding_function = toy_embed,
embedding_dim = 8,
use_web_search = FALSE
)5) Cleanup
rag_chain$disconnect()
unlink(db_path)Notes
- Use your real LLM wrapper instead of
mock_llm. - For production, use a real embedding function (for example
embed_openai()). - Keep
embedding_dimconsistent between insert and retrieval.