Recommendation Systems
Build scalable, real-time recommendation engines using graph traversal, ML embeddings, and vector similarity search.
Problem Statement
Recommendation systems predict user preferences based on:
- Collaborative filtering: Users with similar tastes like similar items
- Content-based: Items with similar features are recommended
- Graph-based: Relationships reveal preferences (friends’ likes, purchase patterns)
Graph databases excel at traversing relationships and computing similarity.
Graph Model
CREATE GRAPH RecommendationEngine;
USE RecommendationEngine;
-- Nodes
(:User {
id: UUID,
name: String,
demographics: JSONB,
embedding: VectorF32 -- User behavior embedding
})
(:Item {
id: UUID,
name: String,
category: String,
price: Decimal,
features: JSONB,
embedding: VectorF32 -- Item feature embedding
})
(:Category {
name: String
})
-- Relationships
(:User)-[:VIEWED {timestamp: Timestamp}]->(:Item)
(:User)-[:PURCHASED {timestamp: Timestamp, rating: Int}]->(:Item)
(:User)-[:RATED {score: Float, timestamp: Timestamp}]->(:Item)
(:User)-[:FOLLOWS]->(:User)
(:Item)-[:IN_CATEGORY]->(:Category)
(:Item)-[:SIMILAR_TO {score: Float}]->(:Item)
Recommendation Strategies
1. Collaborative Filtering (User-User)
Recommend items liked by similar users:
-- Find users similar to current user and their recommendations
MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
WITH similar, COUNT(item) AS common_purchases
ORDER BY common_purchases DESC LIMIT 10
MATCH (similar)-[:PURCHASED]->(recommended:Item)
WHERE NOT EXISTS { MATCH (me:User {id: $user_id})-[:PURCHASED]->(recommended) }
WITH recommended, COUNT(similar) AS recommendation_strength
RETURN recommended.name, recommendation_strength
ORDER BY recommendation_strength DESC LIMIT 20;
func getCollaborativeRecommendations(ctx context.Context, db *sql.DB, userID string) ([]Recommendation, error) {
rows, err := db.QueryContext(ctx, `
MATCH (me:User {id: ?})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
WITH similar, COUNT(item) AS common ORDER BY common DESC LIMIT 10
MATCH (similar)-[:PURCHASED]->(rec:Item)
WHERE NOT EXISTS { MATCH (me:User {id: ?})-[:PURCHASED]->(rec) }
WITH rec, COUNT(similar) AS strength
RETURN rec.name, rec.category, strength
ORDER BY strength DESC LIMIT 20
`, userID, userID)
if err != nil {
return nil, err
}
defer rows.Close()
var recs []Recommendation
for rows.Next() {
var r Recommendation
rows.Scan(&r.Name, &r.Category, &r.Score)
recs = append(recs, r)
}
return recs, nil
}
async def get_collaborative_recommendations(conn, user_id: str, limit: int = 20):
page, _ = await conn.query("""
MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
WITH similar, COUNT(item) AS common ORDER BY common DESC LIMIT 10
MATCH (similar)-[:PURCHASED]->(rec:Item)
WHERE NOT EXISTS { MATCH (me:User {id: $user_id})-[:PURCHASED]->(rec) }
WITH rec, COUNT(similar) AS strength
RETURN rec.name, rec.category, strength
ORDER BY strength DESC LIMIT $limit
""", {"user_id": user_id, "limit": limit})
return [
{"name": row["rec.name"].as_string, "strength": row["strength"].as_int}
for row in page.rows
]
async fn get_collaborative_recommendations(conn: &mut Connection, user_id: &str) -> Result<Vec<Recommendation>> {
let params = [("user_id", Value::string(user_id))].into();
let (page, _) = conn.query_with_params(r#"
MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
WITH similar, COUNT(item) AS common ORDER BY common DESC LIMIT 10
MATCH (similar)-[:PURCHASED]->(rec:Item)
WHERE NOT EXISTS { MATCH (me:User {id: $user_id})-[:PURCHASED]->(rec) }
WITH rec, COUNT(similar) AS strength
RETURN rec.name, rec.category, strength ORDER BY strength DESC LIMIT 20
"#, ¶ms).await?;
Ok(page.rows.iter().map(|row| Recommendation {
name: row.get("rec.name").unwrap().as_string().unwrap(),
category: row.get("rec.category").unwrap().as_string().unwrap(),
score: row.get("strength").unwrap().as_int().unwrap() as f64,
}).collect())
}
async function getCollaborativeRecommendations(userId: string, limit = 20) {
const rows = await client.queryAll(`
MATCH (me:User {id: $userId})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
WITH similar, COUNT(item) AS common ORDER BY common DESC LIMIT 10
MATCH (similar)-[:PURCHASED]->(rec:Item)
WHERE NOT EXISTS { MATCH (me:User {id: $userId})-[:PURCHASED]->(rec) }
WITH rec, COUNT(similar) AS strength
RETURN rec.name, rec.category, strength ORDER BY strength DESC LIMIT $limit
`, { params: { userId, limit } });
return rows.map(row => ({
name: row.get('rec.name')?.asString,
category: row.get('rec.category')?.asString,
strength: row.get('strength')?.asNumber
}));
}
fn getCollaborativeRecommendations(client: *GeodeClient, allocator: std.mem.Allocator, user_id: []const u8) ![]Recommendation {
// Build params JSON object
var params = std.json.ObjectMap.init(allocator);
defer params.deinit();
try params.put("user_id", .{ .string = user_id });
const query =
\\MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)
\\WITH similar, COUNT(item) AS common ORDER BY common DESC LIMIT 10
\\MATCH (similar)-[:PURCHASED]->(rec:Item)
\\WHERE NOT EXISTS { MATCH (me:User {id: $user_id})-[:PURCHASED]->(rec) }
\\WITH rec, COUNT(similar) AS strength
\\RETURN rec.name, strength ORDER BY strength DESC LIMIT 20
;
try client.sendRunGql(1, query, .{ .object = params });
const schema = try client.receiveMessage(30000);
allocator.free(schema);
try client.sendPull(1, 1000);
const result = try client.receiveMessage(30000);
defer allocator.free(result);
return parseRecommendations(allocator, result);
}
2. Collaborative Filtering (Item-Item)
Recommend items similar to user’s purchases:
-- Find items similar to what user has purchased
MATCH (me:User {id: $user_id})-[:PURCHASED]->(purchased:Item)
MATCH (purchased)<-[:PURCHASED]-(other:User)-[:PURCHASED]->(recommended:Item)
WHERE NOT EXISTS {
MATCH (me)-[:PURCHASED]->(recommended)
}
WITH recommended, COUNT(DISTINCT other) AS co_purchase_count
RETURN recommended.name, co_purchase_count
ORDER BY co_purchase_count DESC
LIMIT 20;
3. Social Recommendations
Recommend based on what friends like:
-- Items purchased by friends
MATCH (me:User {id: $user_id})-[:FOLLOWS]->(friend:User)-[:PURCHASED]->(item:Item)
WHERE NOT EXISTS {
MATCH (me)-[:PURCHASED]->(item)
}
WITH item, COLLECT(friend.name) AS friends_who_bought, COUNT(friend) AS friend_count
RETURN item.name, friends_who_bought, friend_count
ORDER BY friend_count DESC
LIMIT 20;
4. Content-Based (Category Preferences)
Recommend items in categories the user likes:
-- Find user's preferred categories
MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)-[:IN_CATEGORY]->(cat:Category)
WITH cat, COUNT(item) AS purchase_count
ORDER BY purchase_count DESC
LIMIT 3
-- Recommend popular items in preferred categories
MATCH (cat)<-[:IN_CATEGORY]-(recommended:Item)
WHERE NOT EXISTS {
MATCH (me:User {id: $user_id})-[:PURCHASED]->(recommended)
}
WITH recommended, COUNT {MATCH (:User)-[:PURCHASED]->(recommended)} AS popularity
RETURN recommended.name, recommended.category, popularity
ORDER BY popularity DESC
LIMIT 20;
Embedding-Based Recommendations
Generate Item Embeddings
Use Node2Vec to create item embeddings based on co-purchase patterns:
-- Generate item embeddings
CALL graph.node2vec('RecommendationEngine', {
relationship_type: 'PURCHASED',
dimensions: 128,
walk_length: 80,
num_walks: 10,
p: 1.0,
q: 1.0
})
YIELD node, embedding
WITH node, embedding
MATCH (item:Item)
WHERE item.id = node.id
SET item.embedding = embedding;
Create Vector Index
CREATE INDEX item_embedding_idx ON Item(embedding) USING vector;
Similarity-Based Recommendations
-- Find items similar to user's recent purchases
MATCH (me:User {id: $user_id})-[p:PURCHASED]->(item:Item)
WHERE p.timestamp > timestamp() - interval('P30D') -- Last 30 days
WITH COLLECT(item.embedding) AS recent_embeddings
-- Compute average embedding (user preference vector)
WITH [e IN recent_embeddings | e] AS embeddings
CALL vector.average(embeddings) YIELD result AS user_embedding
-- Find similar items
MATCH (recommended:Item)
WHERE NOT EXISTS {
MATCH (me:User {id: $user_id})-[:PURCHASED]->(recommended)
}
AND vector_distance_cosine(recommended.embedding, user_embedding) < 0.5
RETURN
recommended.name,
recommended.category,
vector_distance_cosine(recommended.embedding, user_embedding) AS similarity
ORDER BY similarity ASC
LIMIT 20;
Hybrid Recommendation System
Combine multiple strategies with weighted scoring:
-- Collaborative filtering score
MATCH (me:User {id: $user_id})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)-[:PURCHASED]->(recommended:Item)
WHERE NOT EXISTS {
MATCH (me)-[:PURCHASED]->(recommended)
}
WITH recommended, COUNT(DISTINCT similar) AS collab_score
-- Social score
OPTIONAL MATCH (me:User {id: $user_id})-[:FOLLOWS]->(friend:User)-[:PURCHASED]->(recommended)
WITH recommended, collab_score, COUNT(DISTINCT friend) AS social_score
-- Similarity score (requires embeddings)
OPTIONAL MATCH (me)-[:PURCHASED]->(purchased:Item)
WITH recommended, collab_score, social_score,
AVG(vector_distance_cosine(purchased.embedding, recommended.embedding)) AS avg_similarity
-- Weighted final score
WITH recommended,
(collab_score * 0.5) + (social_score * 0.3) + ((1 - avg_similarity) * 0.2) AS final_score
RETURN recommended.name, final_score
ORDER BY final_score DESC
LIMIT 20;
Real-Time Recommendations
Update Embeddings on New Purchases
Use CDC to trigger embedding updates:
# cdc-config.yaml
cdc:
enabled: true
webhooks:
- url: "https://recommendation-service.example.com/update-embeddings"
events: ["edge.created"]
filter: "type = 'PURCHASED'"
Incremental Embedding Updates
Instead of full recomputation, update embeddings incrementally:
# Pseudo-code for webhook handler
async def on_purchase(event):
user_id = event['edge']['from_node']
item_id = event['edge']['to_node']
# Fetch current embeddings
user_emb = await get_user_embedding(user_id)
item_emb = await get_item_embedding(item_id)
# Update user embedding (weighted average with new item)
alpha = 0.1 # Learning rate
new_user_emb = (1 - alpha) * user_emb + alpha * item_emb
# Persist updated embedding
await update_user_embedding(user_id, new_user_emb)
Performance Optimization
Index Strategy
-- Index for user lookups
CREATE INDEX user_id_idx ON User(id) USING hash;
-- Index for temporal queries
CREATE INDEX purchase_timestamp_idx ON PURCHASED(timestamp) USING btree;
-- Vector index for similarity
CREATE INDEX item_embedding_idx ON Item(embedding) USING vector;
CREATE INDEX user_embedding_idx ON User(embedding) USING vector;
-- Category index
CREATE INDEX item_category_idx ON Item(category) USING btree;
Materialized Recommendations
Pre-compute recommendations for active users:
-- Batch job (run nightly)
MATCH (user:User)
WHERE EXISTS {MATCH (user)-[:PURCHASED]->(:Item)}
-- Compute top 50 recommendations
CALL recommend.hybrid(user.id, {limit: 50}) YIELD item, score
-- Store in user property
WITH user, COLLECT({item_id: item.id, score: score}) AS recommendations
SET user.cached_recommendations = recommendations::jsonb;
-- Real-time query uses cache
MATCH (user:User {id: $user_id})
RETURN user.cached_recommendations;
Evaluation Metrics
Precision and Recall
-- Precision@K: % of recommended items that user purchased
MATCH (user:User {id: $user_id})
WITH user, user.cached_recommendations AS recommendations
MATCH (user)-[:PURCHASED]->(purchased:Item)
WHERE purchased.id IN [r IN recommendations | r.item_id][0..20] -- Top 20
WITH COUNT(purchased) AS hits, 20 AS k
RETURN hits / k AS precision_at_20;
Click-Through Rate (CTR)
Track recommendation clicks via CDC:
cdc:
webhooks:
- url: "https://analytics.example.com/track-click"
events: ["edge.created"]
filter: "type = 'VIEWED'"
Complete Example
-- Seed data
CREATE
(:User {id: gen_random_uuid(), name: "Alice"}),
(:User {id: gen_random_uuid(), name: "Bob"}),
(:User {id: gen_random_uuid(), name: "Charlie"});
CREATE
(:Item {id: gen_random_uuid(), name: "Laptop", category: "Electronics"}),
(:Item {id: gen_random_uuid(), name: "Mouse", category: "Electronics"}),
(:Item {id: gen_random_uuid(), name: "Desk Chair", category: "Furniture"}),
(:Item {id: gen_random_uuid(), name: "Monitor", category: "Electronics"});
-- Alice and Bob purchase Laptop
MATCH (alice:User {name: "Alice"}), (laptop:Item {name: "Laptop"})
CREATE (alice)-[:PURCHASED {timestamp: timestamp()}]->(laptop);
MATCH (bob:User {name: "Bob"}), (laptop:Item {name: "Laptop"})
CREATE (bob)-[:PURCHASED {timestamp: timestamp()}]->(laptop);
-- Bob also purchases Mouse
MATCH (bob:User {name: "Bob"}), (mouse:Item {name: "Mouse"})
CREATE (bob)-[:PURCHASED {timestamp: timestamp()}]->(mouse);
-- Recommend Mouse to Alice (collaborative filtering)
MATCH (alice:User {name: "Alice"})-[:PURCHASED]->(item:Item)<-[:PURCHASED]-(similar:User)-[:PURCHASED]->(recommended:Item)
WHERE NOT EXISTS {
MATCH (alice)-[:PURCHASED]->(recommended)
}
WITH recommended, COUNT(similar) AS score
RETURN recommended.name, score
ORDER BY score DESC;
-- Returns: Mouse (score: 1)
Next Steps
- Graph Algorithms - Embedding generation
- Indexing and Optimization - Vector indexes
- Real-Time Analytics - CDC integration
- Data Model and Types - Vector types