LlamaIndex Integration
HatiData integrates with LlamaIndex through FunctionTool wrappers and the ReActAgent. Agents can query the warehouse, explore schemas, search memories by semantic similarity, and use semantic_rank() to score results -- all within a LlamaIndex agent loop.
Installation
pip install hatidata-agent
pip install llama-index llama-index-llms-openai
The hatidata-agent package provides the HatiDataAgent class, which connects to the HatiData proxy.
Configure
Create a HatiDataAgent instance:
from hatidata_agent import HatiDataAgent
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="llamaindex-agent",
framework="llamaindex",
password="hd_live_your_api_key",
)
| Parameter | Default | Description |
|---|---|---|
host | "localhost" | HatiData proxy hostname |
port | 5439 | Proxy port |
agent_id | "agent" | Unique identifier for billing and audit |
framework | "custom" | Set to "llamaindex" for proper tracking |
database | "hatidata" | Target database |
password | "" | API key (hd_live_* or hd_test_*) |
Basic Usage
Wrap HatiData operations as FunctionTool instances and attach them to a ReActAgent:
from hatidata_agent import HatiDataAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="llamaindex-analyst",
framework="llamaindex",
password="hd_live_your_api_key",
)
# Define tool functions
def query_warehouse(sql: str) -> str:
"""Execute a SQL query against the HatiData warehouse. Returns results as a string."""
try:
rows = hati.query(sql)
return str(rows) if rows else "No results."
except Exception as e:
return f"Error: {e}"
def list_tables() -> str:
"""List all available tables in the data warehouse."""
rows = hati.query(
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
)
return ", ".join(r["table_name"] for r in rows)
def describe_table(table_name: str) -> str:
"""Get column names and data types for a specific table."""
rows = hati.query(
f"SELECT column_name, data_type FROM information_schema.columns "
f"WHERE table_name = '{table_name}'"
)
return "\n".join(f"{r['column_name']} {r['data_type']}" for r in rows)
def search_memory(query: str, top_k: int = 5) -> str:
"""Search agent memory by semantic similarity."""
results = hati.search_memory(query=query, top_k=top_k)
if not results:
return "No relevant memories found."
return "\n".join(
f"[{r['importance']:.1f}] {r['content']}" for r in results
)
# Wrap as LlamaIndex FunctionTools
tools = [
FunctionTool.from_defaults(fn=query_warehouse, name="query_warehouse"),
FunctionTool.from_defaults(fn=list_tables, name="list_tables"),
FunctionTool.from_defaults(fn=describe_table, name="describe_table"),
FunctionTool.from_defaults(fn=search_memory, name="search_memory"),
]
# Create a ReActAgent
llm = OpenAI(model="gpt-4o", temperature=0)
agent = ReActAgent.from_tools(
tools=tools,
llm=llm,
verbose=True,
max_iterations=10,
)
# Ask a question
response = agent.chat("What were our top 5 products by revenue last quarter?")
print(response)
Using semantic_rank() for RAG Retrieval
LlamaIndex agents can leverage HatiData's vector SQL extensions to find semantically relevant data without a separate vector store:
def semantic_search(search_text: str, table: str = "_hatidata_agent_memory") -> str:
"""Find rows semantically similar to the search text, ranked by relevance."""
rows = hati.query(f"""
SELECT content, importance,
semantic_rank(content, '{search_text}') AS relevance
FROM {table}
WHERE semantic_match(content, '{search_text}', 0.7)
ORDER BY relevance DESC
LIMIT 10
""")
return str(rows) if rows else "No semantically similar results."
semantic_tool = FunctionTool.from_defaults(
fn=semantic_search,
name="semantic_search",
description="Search for semantically similar content using vector similarity",
)
# Add to agent
agent = ReActAgent.from_tools(
tools=[*tools, semantic_tool],
llm=llm,
verbose=True,
)
response = agent.chat("Find any past analyses related to customer churn")
See Hybrid Search (Vector SQL) for the full vector SQL reference.
Advanced: Query Pipeline Integration
For more structured workflows, use LlamaIndex's QueryPipeline with HatiData tools:
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core.tools import ToolOutput
# Define a pipeline that discovers schema, then queries
def analyze_with_context(question: str) -> str:
"""Multi-step analysis: discover schema, then query."""
# Step 1: Get available tables
tables = list_tables()
# Step 2: Recall relevant past findings
memories = search_memory(question)
# Step 3: Let the agent reason with full context
context = f"Available tables: {tables}\n\nRelevant past findings:\n{memories}"
response = agent.chat(
f"Context:\n{context}\n\nQuestion: {question}"
)
return str(response)
Storing Results as Memory
Agents can persist their findings for future sessions:
def store_finding(content: str, importance: float = 0.7) -> str:
"""Store an analytical finding in long-term agent memory."""
result = hati.store_memory(
content=content,
memory_type="fact",
importance=importance,
)
return f"Stored: {result['memory_id']}"
store_tool = FunctionTool.from_defaults(fn=store_finding, name="store_finding")
# Agent can now store insights for later retrieval
agent = ReActAgent.from_tools(
tools=[*tools, semantic_tool, store_tool],
llm=llm,
verbose=True,
)
Next Steps
- Agent Integrations -- All supported frameworks
- Hybrid Search (Vector SQL) -- Full vector SQL reference
- Agent Memory -- How persistent memory works
- Python SDK -- Full SDK reference