Vertex AI Integration
HatiData integrates with Vertex AI Reasoning Engines to give Google Cloud-hosted agents access to a SQL data warehouse and persistent memory. The HatiDataAgent class provides tool functions that Vertex AI agents can call during reasoning, with full support for HatiData's vector search and memory operations.
Installation
pip install hatidata-agent
pip install google-cloud-aiplatform
Configure
from hatidata_agent import HatiDataAgent
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="vertex-agent",
framework="vertex-ai",
password="hd_live_your_api_key",
)
| Parameter | Default | Description |
|---|---|---|
host | "localhost" | HatiData proxy hostname |
port | 5439 | Proxy port |
agent_id | "agent" | Unique identifier for billing and audit |
framework | "custom" | Set to "vertex-ai" for proper tracking |
database | "hatidata" | Target database |
password | "" | API key (hd_live_* or hd_test_*) |
When running on GCP, you can use Workload Identity Federation instead of API keys. See Authentication for federated auth configuration.
Basic Usage
Define HatiData tools for the Vertex AI agent:
from hatidata_agent import HatiDataAgent
import vertexai
from vertexai.preview import reasoning_engines
vertexai.init(project="your-gcp-project", location="us-central1")
hati = HatiDataAgent(
host="your-org.proxy.hatidata.com",
port=5439,
agent_id="vertex-analyst",
framework="vertex-ai",
password="hd_live_your_api_key",
)
# Define tool functions
def query_warehouse(sql: str) -> str:
"""Execute a SQL query against the HatiData warehouse. Returns results as a string."""
try:
rows = hati.query(sql)
return str(rows) if rows else "No results."
except Exception as e:
return f"Error: {e}"
def list_tables() -> str:
"""List all available tables in the data warehouse."""
rows = hati.query(
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'"
)
return ", ".join(r["table_name"] for r in rows)
def describe_table(table_name: str) -> str:
"""Get column names and data types for a specific table."""
rows = hati.query(
f"SELECT column_name, data_type FROM information_schema.columns "
f"WHERE table_name = '{table_name}'"
)
return "\n".join(f"{r['column_name']} {r['data_type']}" for r in rows)
def search_memory(query: str, top_k: int = 5) -> str:
"""Search agent memory for relevant past findings using semantic similarity."""
results = hati.search_memory(query=query, top_k=top_k)
if not results:
return "No relevant memories found."
return "\n".join(f"[{r['importance']:.1f}] {r['content']}" for r in results)
def store_memory(content: str, importance: float = 0.7) -> str:
"""Store an analytical finding in long-term agent memory."""
result = hati.store_memory(
content=content,
memory_type="fact",
importance=importance,
)
return f"Stored: {result['memory_id']}"
# Create a Reasoning Engine with HatiData tools
agent = reasoning_engines.LangchainAgent(
model="gemini-2.0-flash",
tools=[
query_warehouse,
list_tables,
describe_table,
search_memory,
store_memory,
],
agent_executor_kwargs={"verbose": True},
)
# Query locally
response = agent.query(
input="What were our top 10 customers by revenue this quarter?"
)
print(response["output"])
Deploy to Vertex AI
Once the agent works locally, deploy it as a remote Reasoning Engine:
# Deploy
remote_agent = reasoning_engines.ReasoningEngine.create(
agent,
requirements=[
"hatidata-agent",
"google-cloud-aiplatform[langchain,reasoningengine]",
],
display_name="HatiData Analyst Agent",
description="Data analyst agent with HatiData warehouse access",
)
# Query the deployed agent
response = remote_agent.query(
input="Summarize revenue trends by product category for the last 6 months"
)
print(response["output"])
For deployed Reasoning Engines, store the HatiData API key in Google Secret Manager and load it at runtime. Do not embed API keys in the deployment package.
HatiMemoryTool Pattern
Wrap memory operations into a single tool class for cleaner agent definitions:
class HatiMemoryTool:
"""Provides memory operations for Vertex AI agents."""
def __init__(self, hati: HatiDataAgent):
self.hati = hati
def search(self, query: str, top_k: int = 5) -> str:
"""Search memories by semantic similarity."""
results = self.hati.search_memory(query=query, top_k=top_k)
if not results:
return "No relevant memories found."
return "\n".join(f"[{r['importance']:.1f}] {r['content']}" for r in results)
def store(self, content: str, importance: float = 0.7) -> str:
"""Store a new memory entry."""
result = self.hati.store_memory(
content=content,
memory_type="fact",
importance=importance,
)
return f"Stored: {result['memory_id']}"
def get_state(self, key: str) -> str:
"""Retrieve a named agent state value."""
state = self.hati.get_state(key)
return state.get("value", "Not found") if state else "Not found"
def set_state(self, key: str, value: str) -> str:
"""Set a named agent state value."""
self.hati.set_state(key, value)
return f"State '{key}' updated."
memory_tool = HatiMemoryTool(hati)
agent = reasoning_engines.LangchainAgent(
model="gemini-2.0-flash",
tools=[
query_warehouse,
list_tables,
describe_table,
memory_tool.search,
memory_tool.store,
memory_tool.get_state,
memory_tool.set_state,
],
)
Hybrid SQL + Vector Search
Vertex AI agents can use HatiData's vector SQL extensions for semantic queries against the warehouse:
def semantic_query(search_text: str) -> str:
"""Find rows semantically similar to the search text in agent memory."""
rows = hati.query(f"""
SELECT content, importance,
semantic_rank(content, '{search_text}') AS relevance
FROM _hatidata_agent_memory
WHERE semantic_match(content, '{search_text}', 0.7)
ORDER BY relevance DESC
LIMIT 10
""")
return str(rows) if rows else "No results."
See Hybrid Search (Vector SQL) for the full vector SQL reference.
Next Steps
- Agent Integrations -- All supported frameworks
- Agent Memory -- How persistent memory works
- Security: Authentication -- Federated auth with GCP Workload Identity
- Python SDK -- Full SDK reference