Hybrid SQL+Vector RAG
In this tutorial you will build a Retrieval-Augmented Generation (RAG) pipeline that combines SQL metadata filtering with vector similarity search. Unlike pure vector databases that only support approximate nearest neighbor queries, HatiData lets you filter, join, and aggregate alongside semantic search in a single SQL statement.
By the end you will have:
- A knowledge base with structured metadata and semantic search
- Hybrid queries that combine
WHEREfilters withsemantic_match() - Relevance-ranked retrieval using
semantic_rank() - A complete RAG pipeline that feeds retrieved context to an LLM
Prerequisites
- Python 3.10+
- HatiData proxy running with an embedding provider configured
hatidataandopenaiSDKs installed
pip install hatidata openai
export HATIDATA_API_KEY="hd_live_your_api_key"
export HATIDATA_HOST="localhost"
export OPENAI_API_KEY="sk-..."
Step 1: Create the Knowledge Base
Build a knowledge base with structured metadata columns that support SQL filtering:
import os
from hatidata import HatiDataClient
client = HatiDataClient(
host=os.environ["HATIDATA_HOST"],
port=5439,
api_key=os.environ["HATIDATA_API_KEY"],
)
client.execute("""
CREATE TABLE IF NOT EXISTS knowledge_base (
doc_id TEXT PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
category TEXT NOT NULL,
audience TEXT DEFAULT 'general',
version INTEGER DEFAULT 1,
updated_at TIMESTAMPTZ DEFAULT now()
)
""")
# Seed documents
docs = [
("doc-001", "Getting Started with HatiData",
"HatiData is an agent-native data infrastructure. Install the SDK with pip install hatidata. "
"Connect to the proxy on port 5439 using any Postgres client.",
"getting-started", "developer", 3),
("doc-002", "Memory Architecture",
"HatiData stores agent memories in structured tables with automatic embedding indexing. "
"Hybrid search combines vector ANN pre-filter with exact SQL joins.",
"architecture", "developer", 2),
("doc-003", "Pricing Plans",
"Free tier: 1M queries/month. Growth: 10M queries, priority support. "
"Enterprise: unlimited queries, PrivateLink, dedicated support engineer.",
"pricing", "business", 1),
("doc-004", "Security and Compliance",
"HatiData supports SOC 2 Type II, CMEK encryption, row-level security, "
"and immutable audit trails with cryptographic hash chains.",
"security", "enterprise", 2),
("doc-005", "Branch Isolation for Safe Exploration",
"Create copy-on-write branches to let agents explore data without affecting production. "
"Branches use schema isolation with zero-copy views on creation.",
"features", "developer", 1),
("doc-006", "Enterprise SSO Configuration",
"Configure SAML or OIDC single sign-on through Clerk. "
"Supports organization-level authentication and SCIM directory sync.",
"security", "enterprise", 1),
]
client.executemany(
"INSERT INTO knowledge_base (doc_id, title, content, category, audience, version) "
"VALUES (?, ?, ?, ?, ?, ?)",
docs,
)
print(f"Seeded {len(docs)} documents.")
Step 2: Pure Vector Search
Use semantic_match() for basic vector similarity filtering:
def vector_search(query: str, threshold: float = 0.7, limit: int = 5) -> list[dict]:
"""Search by semantic similarity only."""
query_escaped = query.replace("'", "''")
rows = client.query(f"""
SELECT
doc_id,
title,
content,
category,
semantic_rank(content, '{query_escaped}') AS relevance
FROM knowledge_base
WHERE semantic_match(content, '{query_escaped}', {threshold})
ORDER BY relevance DESC
LIMIT {limit}
""")
return [dict(r) for r in rows]
results = vector_search("How do I connect to the database?")
for r in results:
print(f"[{r['relevance']:.3f}] {r['title']}")
Step 3: Hybrid Search with SQL Filters
Combine semantic search with structured metadata filters for precision retrieval:
def hybrid_search(
query: str,
category: str | None = None,
audience: str | None = None,
min_version: int | None = None,
threshold: float = 0.65,
limit: int = 5,
) -> list[dict]:
"""Search with both semantic similarity and SQL metadata filters."""
query_escaped = query.replace("'", "''")
filters = []
if category:
filters.append(f"category = '{category}'")
if audience:
filters.append(f"audience = '{audience}'")
if min_version:
filters.append(f"version >= {min_version}")
where_clause = " AND ".join(
[f"semantic_match(content, '{query_escaped}', {threshold})"] + filters
)
rows = client.query(f"""
SELECT
doc_id,
title,
content,
category,
audience,
version,
semantic_rank(content, '{query_escaped}') AS relevance
FROM knowledge_base
WHERE {where_clause}
ORDER BY relevance DESC
LIMIT {limit}
""")
return [dict(r) for r in rows]
# Search for security docs aimed at enterprise audience
results = hybrid_search(
query="authentication and single sign-on",
category="security",
audience="enterprise",
)
for r in results:
print(f"[{r['relevance']:.3f}] {r['title']} (v{r['version']}, {r['audience']})")
Step 4: Cross-Table Retrieval
Join the knowledge base with other tables for context-enriched retrieval:
# Create a related table
client.execute("""
CREATE TABLE IF NOT EXISTS doc_feedback (
doc_id TEXT REFERENCES knowledge_base(doc_id),
helpful_count INTEGER DEFAULT 0,
view_count INTEGER DEFAULT 0
)
""")
client.executemany(
"INSERT INTO doc_feedback VALUES (?, ?, ?)",
[("doc-001", 42, 300), ("doc-002", 28, 180), ("doc-003", 15, 450),
("doc-004", 35, 220), ("doc-005", 20, 95), ("doc-006", 8, 60)],
)
def search_with_popularity(query: str, limit: int = 5) -> list[dict]:
"""Combine semantic relevance with popularity signals."""
query_escaped = query.replace("'", "''")
rows = client.query(f"""
SELECT
kb.doc_id,
kb.title,
kb.content,
kb.category,
semantic_rank(kb.content, '{query_escaped}') AS relevance,
f.helpful_count,
f.view_count,
-- Blended score: 70% semantic + 30% popularity
(0.7 * semantic_rank(kb.content, '{query_escaped}')
+ 0.3 * (CAST(f.helpful_count AS FLOAT) / NULLIF(f.view_count, 0)))
AS blended_score
FROM knowledge_base kb
LEFT JOIN doc_feedback f ON kb.doc_id = f.doc_id
WHERE semantic_match(kb.content, '{query_escaped}', 0.6)
ORDER BY blended_score DESC
LIMIT {limit}
""")
return [dict(r) for r in rows]
results = search_with_popularity("How do I get started?")
for r in results:
print(f"[blended: {r['blended_score']:.3f}] {r['title']} "
f"({r['helpful_count']} helpful / {r['view_count']} views)")
Step 5: Build the RAG Pipeline
Combine retrieval with LLM generation:
import openai
oai = openai.OpenAI()
def rag_query(
question: str,
category: str | None = None,
audience: str | None = None,
) -> str:
"""Answer a question using hybrid retrieval + LLM generation."""
# Retrieve relevant documents
results = hybrid_search(
query=question,
category=category,
audience=audience,
threshold=0.6,
limit=5,
)
if not results:
return "I could not find relevant information to answer your question."
# Format context
context = "\n\n".join(
f"### {r['title']} (relevance: {r['relevance']:.2f})\n{r['content']}"
for r in results
)
# Generate answer
response = oai.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": (
"Answer the user's question based on the provided context. "
"If the context does not contain enough information, say so. "
"Cite the document titles you used."
),
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {question}",
},
],
temperature=0,
)
return response.choices[0].message.content
# Test the RAG pipeline
answer = rag_query(
"How does HatiData handle security and authentication?",
audience="enterprise",
)
print(answer)
Threshold Tuning Guide
| Threshold | Precision | Recall | Best For |
|---|---|---|---|
| 0.85+ | Very high | Low | Exact match, deduplication |
| 0.75 | High | Medium | Targeted retrieval, specific facts |
| 0.65 (recommended) | Medium | Medium-High | General RAG, hybrid search |
| 0.55 | Low-Medium | High | Exploratory search |
Start with 0.65 and adjust based on your recall/precision needs. The SQL metadata filters already narrow the candidate set, so a lower threshold is often acceptable in hybrid queries.
Related Concepts
- Hybrid SQL -- Full
semantic_matchandsemantic_rankreference - SQL Functions & Types -- Complete SQL function reference
- Memory Patterns -- RAG patterns for agent memory
- LlamaIndex Integration -- RAG with LlamaIndex
- Arrow & Polars Integration -- High-performance retrieval