Migrate from Weaviate
Weaviate is a vector database with schema-based collections and GraphQL queries. HatiData replaces Weaviate's vector-only storage with a unified SQL + vector layer where your data is queryable with standard SQL, joinable with business tables, and governed with enterprise-grade access control. This guide covers schema mapping, data export, and query rewriting.
Feature Comparison
| Capability | Weaviate | HatiData |
|---|---|---|
| Vector storage | Native | Native (built-in vector index) |
| Schema definition | Collection classes with properties | SQL CREATE TABLE |
| Query language | GraphQL (Get, Aggregate) | SQL with semantic_match(), semantic_rank() |
| Metadata filtering | GraphQL where filter | SQL WHERE clause (full predicate support) |
| Cross-collection joins | Not supported | SQL JOIN across any tables |
| Hybrid search | BM25 + vector (built-in) | Vector ANN + SQL filter |
| Authentication | API key, OIDC | API key, JWT, OIDC, SAML, federated |
| Multi-tenancy | Class-level isolation | Organization + agent-level isolation |
| Audit trail | Not built-in | Immutable, cryptographically hash-chained |
| Wire protocol | REST + GraphQL | Postgres wire protocol (port 5439) |
| Agent features | Not built-in | Memory, CoT ledger, triggers, branches |
Schema Mapping
Weaviate Class to HatiData Table
# Weaviate schema
{
"class": "Article",
"description": "Knowledge base articles",
"properties": [
{"name": "title", "dataType": ["text"]},
{"name": "content", "dataType": ["text"]},
{"name": "category", "dataType": ["text"]},
{"name": "author", "dataType": ["text"]},
{"name": "publishedAt", "dataType": ["date"]},
{"name": "wordCount", "dataType": ["int"]},
],
"vectorizer": "text2vec-openai",
}
-- HatiData equivalent
CREATE TABLE articles (
article_id TEXT PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
category TEXT,
author TEXT,
published_at TIMESTAMPTZ,
word_count INTEGER
);
HatiData does not require a vectorizer declaration in the schema. Embeddings are generated automatically when you use semantic_match() or store content via the memory API.
Type Mapping
| Weaviate Type | HatiData Type |
|---|---|
text | TEXT |
text[] | TEXT[] or JSON |
int | INTEGER |
number | DOUBLE |
boolean | BOOLEAN |
date | TIMESTAMPTZ |
uuid | TEXT (UUID format) |
blob | BLOB |
object | JSON |
geoCoordinates | Two DOUBLE columns (lat, lon) |
Data Migration
Step 1: Export from Weaviate
import weaviate
import json
client = weaviate.Client("http://localhost:8080")
# Export all objects from a class
def export_class(class_name: str, output_file: str):
batch_size = 100
cursor = None
total = 0
with open(output_file, "w") as f:
while True:
query = (
client.query
.get(class_name, ["title", "content", "category", "author", "publishedAt", "wordCount"])
.with_additional(["id", "vector", "creationTimeUnix"])
.with_limit(batch_size)
)
if cursor:
query = query.with_after(cursor)
result = query.do()
objects = result["data"]["Get"][class_name]
if not objects:
break
for obj in objects:
record = {
"id": obj["_additional"]["id"],
"vector": obj["_additional"]["vector"],
"properties": {k: v for k, v in obj.items() if k != "_additional"},
"created_at": obj["_additional"]["creationTimeUnix"],
}
f.write(json.dumps(record) + "\n")
total += 1
cursor = objects[-1]["_additional"]["id"]
print(f"Exported {total} objects from {class_name}")
export_class("Article", "weaviate-articles.jsonl")
Step 2: Import into HatiData
from hatidata import HatiDataClient
import json
hd = HatiDataClient(
host="localhost",
port=5439,
api_key="hd_live_your_api_key",
)
# Create the table
hd.execute("""
CREATE TABLE IF NOT EXISTS articles (
article_id TEXT PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
category TEXT,
author TEXT,
published_at TIMESTAMPTZ,
word_count INTEGER
)
""")
# Import data
with open("weaviate-articles.jsonl") as f:
batch = []
for line in f:
record = json.loads(line)
props = record["properties"]
batch.append((
record["id"],
props.get("title", ""),
props.get("content", ""),
props.get("category"),
props.get("author"),
props.get("publishedAt"),
props.get("wordCount"),
))
if len(batch) >= 1000:
hd.executemany(
"INSERT INTO articles VALUES (?, ?, ?, ?, ?, ?, ?)",
batch,
)
batch = []
if batch:
hd.executemany(
"INSERT INTO articles VALUES (?, ?, ?, ?, ?, ?, ?)",
batch,
)
print("Import complete.")
Step 3: Store as Memories (Optional)
If you want the content indexed for semantic search via the memory API:
from hatidata.memory import MemoryClient
memory = MemoryClient(hd)
with open("weaviate-articles.jsonl") as f:
for line in f:
record = json.loads(line)
props = record["properties"]
memory.store(
agent_id="knowledge-agent",
content=props.get("content", ""),
metadata={
"title": props.get("title"),
"category": props.get("category"),
"source": "weaviate-migration",
"original_id": record["id"],
},
)
Step 4: Verify
-- Count imported records
SELECT COUNT(*) AS total FROM articles;
-- Verify semantic search works
SELECT title, semantic_rank(content, 'machine learning') AS relevance
FROM articles
WHERE semantic_match(content, 'machine learning', 0.7)
ORDER BY relevance DESC
LIMIT 5;
Query Migration
Semantic Search
# Weaviate GraphQL
{
Get {
Article(
nearText: {
concepts: ["machine learning applications"],
certainty: 0.7
}
limit: 5
) {
title
content
category
_additional { certainty }
}
}
}
-- HatiData SQL
SELECT
title,
content,
category,
semantic_rank(content, 'machine learning applications') AS relevance
FROM articles
WHERE semantic_match(content, 'machine learning applications', 0.7)
ORDER BY relevance DESC
LIMIT 5;
Filtered Search
# Weaviate GraphQL
{
Get {
Article(
nearText: { concepts: ["data security"], certainty: 0.7 }
where: {
operator: And,
operands: [
{ path: ["category"], operator: Equal, valueText: "security" },
{ path: ["wordCount"], operator: GreaterThan, valueInt: 500 }
]
}
limit: 10
) {
title content category wordCount
}
}
}
-- HatiData SQL
SELECT title, content, category, word_count,
semantic_rank(content, 'data security') AS relevance
FROM articles
WHERE semantic_match(content, 'data security', 0.7)
AND category = 'security'
AND word_count > 500
ORDER BY relevance DESC
LIMIT 10;
Aggregation
# Weaviate GraphQL
{
Aggregate {
Article(groupBy: ["category"]) {
groupedBy { value }
meta { count }
}
}
}
-- HatiData SQL
SELECT category, COUNT(*) AS article_count
FROM articles
GROUP BY category
ORDER BY article_count DESC;
What You Gain
- Standard SQL -- No GraphQL schema management; use familiar SQL
- Cross-table joins -- Join articles with users, events, business data
- Hybrid search -- Vector + SQL in one query, no separate BM25 index
- Agent features -- Memory, CoT ledger, triggers, branches out of the box
- Governance -- Row-level security, audit trails, per-agent billing
- Wire protocol -- Connect with psycopg2, asyncpg, SQLAlchemy, any BI tool
Related Concepts
- Persistent Memory -- Memory architecture
- Hybrid SQL -- Semantic search functions
- SQL Functions & Types -- Full SQL reference
- Migrate from Pinecone -- Another vector DB migration
- Migrate from Mem0 -- Memory service migration