AutoGen Integration
HatiData integrates with Microsoft AutoGen to give multi-agent GroupChat conversations governed access to a shared data layer. Agents in a GroupChat can query structured data, store and retrieve memories, and share state — all persisted in HatiData and attributed per-agent for billing and audit.
ANDI (Agent-Native Data Interface) is the abstraction layer that connects AutoGen function calls to HatiData's query pipeline. When an agent calls query(), ANDI runs the full multi-stage pipeline: policy check, cost estimation, quota enforcement, row-level security, SQL transpilation, query execution, AI healing on failure, column masking, and audit logging. The agent receives structured results without any awareness of the underlying data infrastructure.
Installation
pip install hatidata-agent
pip install pyautogen
The hatidata-agent package provides the HatiDataAgent class, which connects to the HatiData proxy over the PostgreSQL wire protocol.
Connection Setup
from hatidata_agent import HatiDataAgent
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="autogen-analyst",
framework="autogen",
password="hd_live_your_api_key",
)
| Parameter | Default | Description |
|---|---|---|
host | "localhost" | HatiData proxy hostname |
port | 5439 | Proxy port |
agent_id | "agent" | Unique identifier for billing and audit |
framework | "custom" | Set to "autogen" for correct tracking |
database | "hatidata" | Target database |
password | "" | API key (hd_live_* or hd_test_*) |
Basic Usage
Register HatiData query functions with AutoGen agents so they can call them during conversation:
import autogen
from hatidata_agent import HatiDataAgent
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="autogen-analyst",
framework="autogen",
password="hd_live_your_api_key",
)
def query_data_layer(sql: str) -> str:
"""Execute a SQL query against the data layer."""
try:
rows = hati.query(sql)
return str(rows) if rows else "No results."
except Exception as e:
return f"Error: {e}"
def list_tables() -> str:
"""List all available tables."""
rows = hati.query(
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
)
return ", ".join(r["table_name"] for r in rows)
def describe_table(table_name: str) -> str:
"""Get column names and types for a table."""
rows = hati.query(
f"SELECT column_name, data_type FROM information_schema.columns "
f"WHERE table_name = '{table_name}'"
)
return "\n".join(f"{r['column_name']} {r['data_type']}" for r in rows)
assistant = autogen.AssistantAgent(
name="data_analyst",
system_message="""You are a data analyst with access to a SQL data layer.
Use list_tables() to discover tables, describe_table() to understand schemas,
and query_data_layer() to run SQL queries. Always explore the schema before querying.""",
llm_config={"config_list": [{"model": "gpt-4o"}]},
)
user_proxy = autogen.UserProxyAgent(
name="user",
human_input_mode="NEVER",
max_consecutive_auto_reply=10,
function_map={
"query_data_layer": query_data_layer,
"list_tables": list_tables,
"describe_table": describe_table,
},
)
user_proxy.initiate_chat(
assistant,
message="What is the average order value by customer segment this quarter?",
)
GroupChat with Shared State
Multiple AutoGen agents sharing a single HatiData connection can collaborate on complex analysis tasks. Each agent gets its own agent_id for billing attribution while sharing the same data layer:
import autogen
from hatidata_agent import HatiDataAgent
def make_hati(agent_name: str) -> HatiDataAgent:
return HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id=f"autogen-{agent_name}",
framework="autogen",
password="hd_live_your_api_key",
)
researcher_hati = make_hati("researcher")
analyst_hati = make_hati("analyst")
def explore_schema() -> str:
"""Discover tables and their schemas."""
tables = researcher_hati.query(
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
)
result = []
for t in tables:
cols = researcher_hati.query(
f"SELECT column_name, data_type FROM information_schema.columns "
f"WHERE table_name = '{t['table_name']}'"
)
col_str = ", ".join(f"{c['column_name']} ({c['data_type']})" for c in cols)
result.append(f"{t['table_name']}: {col_str}")
return "\n".join(result)
def run_analysis(sql: str) -> str:
"""Execute an analytical SQL query."""
try:
return str(analyst_hati.query(sql))
except Exception as e:
return f"Error: {e}"
researcher = autogen.AssistantAgent(
name="researcher",
system_message="You explore database schemas. Use explore_schema() to discover available data.",
llm_config={"config_list": [{"model": "gpt-4o"}]},
)
analyst = autogen.AssistantAgent(
name="analyst",
system_message="You write SQL queries based on the researcher's schema findings. Use run_analysis().",
llm_config={"config_list": [{"model": "gpt-4o"}]},
)
writer = autogen.AssistantAgent(
name="writer",
system_message="You synthesize analysis results into clear business insights.",
llm_config={"config_list": [{"model": "gpt-4o"}]},
)
user_proxy = autogen.UserProxyAgent(
name="user",
human_input_mode="NEVER",
function_map={
"explore_schema": explore_schema,
"run_analysis": run_analysis,
},
)
groupchat = autogen.GroupChat(
agents=[user_proxy, researcher, analyst, writer],
messages=[],
max_round=15,
)
manager = autogen.GroupChatManager(
groupchat=groupchat,
llm_config={"config_list": [{"model": "gpt-4o"}]},
)
user_proxy.initiate_chat(
manager,
message="Analyze our customer retention rates and identify the top factors driving churn.",
)
Memory-Augmented Agents
AutoGen agents can store and retrieve memories across conversations using HatiData's long-term memory system:
from hatidata_agent import HatiDataAgent
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
agent_id="autogen-memory-agent",
framework="autogen",
password="hd_live_your_api_key",
)
def store_finding(content: str, importance: float = 0.7) -> str:
"""Store an analytical finding in long-term memory."""
result = hati.store_memory(
content=content,
memory_type="fact",
importance=importance,
)
return f"Stored memory: {result['memory_id']}"
def recall_findings(query: str, top_k: int = 5) -> str:
"""Search past findings by semantic similarity."""
results = hati.search_memory(query=query, top_k=top_k)
if not results:
return "No relevant findings in memory."
return "\n".join(
f"[importance={r['importance']}] {r['content']}" for r in results
)
Memories are persisted in HatiData's _hatidata_agent_memory table. Searches use hybrid retrieval: vector ANN for semantic similarity, followed by a metadata join by memory_id UUID. This gives sub-10ms p50 search latency across millions of stored memories.
Hybrid SQL + Vector Search
AutoGen agents can use HatiData's vector SQL extensions to run semantic queries directly in SQL:
def semantic_query(search_text: str) -> str:
"""Find relevant memories using semantic search."""
rows = hati.query(f"""
SELECT content, importance,
semantic_rank(content, '{search_text}') AS relevance
FROM _hatidata_agent_memory
WHERE semantic_match(content, '{search_text}', 0.7)
ORDER BY relevance DESC
LIMIT 10
""")
return str(rows)
See Hybrid SQL for the full vector SQL reference.
Cookbook
For end-to-end examples including GroupChat financial analysis, multi-agent research pipelines, and memory-augmented workflows, see the HatiData Cookbook.
Related Concepts
- Core Concepts: Persistent Memory — How the memory system works under the hood
- MCP Tools Reference — All 24 MCP tools
- Python SDK — Full SDK reference
- LangChain Integration — Memory, VectorStore, and Toolkit for LangChain
- CrewAI Integration — Multi-agent crews with per-role billing