VIBE AI implements a Mem0-style self-improving memory system that enables agents to learn and remember across conversations.
┌─────────────────────────────────────────────────────────────────────┐
│ MEMORY HIERARCHY │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ USER MEMORY (Long-term) │
│ ┌───────────────────────────────────────────────────────────────┐ │
│ │ • Preferences: "Prefers BTC over ETH" │ │
│ │ • Facts: "Works at Acme Corp" │ │
│ │ • History: "Invested in SOL on 2024-01-15" │ │
│ │ • Shared across ALL agents │ │
│ └───────────────────────────────────────────────────────────────┘ │
│ │ │
│ AGENT MEMORY (Per-agent) ▼ │
│ ┌───────────────────────────────────────────────────────────────┐ │
│ │ • Agent-specific knowledge │ │
│ │ • Learned patterns │ │
│ │ • Custom instructions │ │
│ └───────────────────────────────────────────────────────────────┘ │
│ │ │
│ SESSION MEMORY (Short-term) ▼ │
│ ┌───────────────────────────────────────────────────────────────┐ │
│ │ • Current conversation context │ │
│ │ • Recent messages │ │
│ │ • Temporary state │ │
│ └───────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
Conversation Auto-Extract Memory Store
┌───────────────┐ ┌───────────────┐ ┌───────────────┐
│ │ LLM │ │ │ │
│ User: "I │────────▶│ Extracted: │────────▶│ pgvector │
│ prefer BTC │ │ • preference: │ │ ┌─────────┐ │
│ over ETH │ │ BTC > ETH │ │ │ Content │ │
│ for long │ │ • timeframe: │ │ │Embedding│ │
│ term holds" │ │ long_term │ │ │Metadata │ │
│ │ │ │ │ └─────────┘ │
└───────────────┘ └───────────────┘ └───────────────┘
EXTRACTION_PROMPT = """
Analyze this conversation and extract facts to remember.
Return JSON with:
- content: The fact to remember
- type: preference | fact | event | instruction
- confidence: 0.0-1.0
- tags: relevant categories
Example:
{
"memories": [
{
"content": "User prefers BTC over ETH for long-term holds",
"type": "preference",
"confidence": 0.95,
"tags": ["crypto", "trading", "preference"]
}
]
}
"""
await memory.add(
content="User prefers detailed explanations",
type="preference",
user_id=user_id,
metadata={"source": "explicit_request"}
)
# Semantic search
results = await memory.search(
query="crypto preferences",
user_id=user_id,
limit=5,
threshold=0.7
)
# Returns relevant memories ranked by similarity
for result in results:
print(f"{result.content} (score: {result.similarity})")
# Update existing memory
await memory.update(
memory_id=memory_id,
content="User now prefers ETH over BTC",
metadata={"updated_reason": "user_correction"}
)
# Delete specific memory
await memory.delete(memory_id=memory_id)
# Delete all user memories
await memory.delete_all(user_id=user_id)
async def build_context(self, query: str) -> dict:
# 1. Search relevant memories
memories = await self.memory.search(
query=query,
user_id=self.user_id,
limit=10
)
# 2. Format for LLM
memory_context = self.format_memories(memories)
# 3. Build full context
return {
"system_prompt": self.agent.system_prompt,
"user_context": memory_context, # Injected here
"conversation": self.get_recent_messages(),
"tools": self.get_tool_schemas()
}
## User Context
Based on previous interactions:
- User prefers BTC over ETH for long-term holds
- User works at Acme Corp
- User invested in SOL on 2024-01-15
- User prefers detailed explanations
Use this context to personalize your responses.
When new memories conflict with existing ones:
async def add_with_dedup(self, new_memory: Memory) -> Memory:
# Find similar memories
similar = await self.search(
query=new_memory.content,
threshold=0.9
)
if similar:
existing = similar[0]
if new_memory.timestamp > existing.timestamp:
# New memory is more recent - update
return await self.update(
memory_id=existing.id,
content=new_memory.content
)
else:
# Keep existing
return existing
# No conflict - add new
return await self.add(new_memory)
Memories become less relevant over time:
def calculate_score(memory: Memory, query: str) -> float:
# Semantic similarity
similarity = cosine_similarity(
embed(query),
memory.embedding
)
# Time decay
days_old = (now() - memory.created_at).days
time_factor = 1.0 / (1.0 + days_old / 30)
# Access frequency
access_factor = min(memory.access_count / 10, 1.0)
# Combined score
return similarity * 0.6 + time_factor * 0.2 + access_factor * 0.2
-- User memories
CREATE TABLE user_memory (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
account_id UUID NOT NULL REFERENCES accounts(account_id),
-- Content
content TEXT NOT NULL,
type VARCHAR(50), -- preference, fact, event, instruction
-- Vector embedding
embedding vector(1536),
-- Metadata
metadata JSONB DEFAULT '{}',
confidence FLOAT DEFAULT 1.0,
tags TEXT[],
-- Tracking
access_count INTEGER DEFAULT 0,
last_accessed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX idx_memory_account ON user_memory(account_id);
CREATE INDEX idx_memory_embedding ON user_memory
USING ivfflat (embedding vector_cosine_ops);
CREATE INDEX idx_memory_tags ON user_memory USING gin(tags);
memory:
# Enable auto-extraction
auto_extract: true
# Extraction settings
extraction:
model: "gpt-4o-mini"
min_confidence: 0.7
max_per_conversation: 5
# Search settings
search:
default_limit: 10
similarity_threshold: 0.65
include_agent_memory: true
# Cleanup settings
cleanup:
max_memories_per_user: 1000
decay_after_days: 90
min_access_to_keep: 2
Next: A2A Protocol →