Knowledge Graph Guide
This guide demonstrates how to build and query knowledge graphs using Geode. You’ll learn ontology design, entity-relationship modeling, inference patterns, and integration with NLP systems.
Overview
Knowledge graphs represent real-world entities and their relationships in a structured format that machines can process. They power:
- Semantic search - Find answers, not just documents
- Question answering - Natural language queries over structured data
- Data integration - Connect disparate data sources
- Recommendation - Context-aware suggestions
- AI/ML features - Graph embeddings and reasoning
Geode’s property graph model is ideal for knowledge graphs because it naturally represents entities as nodes and relationships as edges with rich attributes.
Ontology Design
Core Concepts
An ontology defines the types of entities and relationships in your knowledge graph.
// Entity types (classes)
(:Entity {
id: STRING,
name: STRING,
description: STRING,
aliases: LIST<STRING>,
source: STRING,
confidence: FLOAT,
created_at: TIMESTAMP,
updated_at: TIMESTAMP
})
// Specific entity types (subclasses)
(:Person:Entity {
birth_date: DATE,
death_date: DATE,
nationality: STRING,
occupation: LIST<STRING>,
gender: STRING
})
(:Organization:Entity {
founded_date: DATE,
dissolved_date: DATE,
org_type: STRING, // "company", "nonprofit", "government"
industry: STRING,
headquarters: STRING
})
(:Location:Entity {
coordinates: POINT,
location_type: STRING, // "city", "country", "building"
population: INTEGER,
area_km2: FLOAT
})
(:Concept:Entity {
definition: STRING,
domain: STRING
})
(:Event:Entity {
start_date: DATE,
end_date: DATE,
event_type: STRING
})
(:Product:Entity {
release_date: DATE,
product_type: STRING,
manufacturer: STRING
})
Relationship Types
// Person relationships
(:Person)-[:BORN_IN {date: DATE}]->(:Location)
(:Person)-[:DIED_IN {date: DATE}]->(:Location)
(:Person)-[:WORKS_FOR {start_date: DATE, end_date: DATE, role: STRING}]->(:Organization)
(:Person)-[:FOUNDED {date: DATE}]->(:Organization)
(:Person)-[:EDUCATED_AT {degree: STRING, field: STRING, year: INTEGER}]->(:Organization)
(:Person)-[:MARRIED_TO {start_date: DATE, end_date: DATE}]->(:Person)
(:Person)-[:PARENT_OF]->(:Person)
(:Person)-[:KNOWS]->(:Person)
(:Person)-[:AUTHORED]->(:CreativeWork)
(:Person)-[:PARTICIPATED_IN {role: STRING}]->(:Event)
// Organization relationships
(:Organization)-[:HEADQUARTERED_IN]->(:Location)
(:Organization)-[:SUBSIDIARY_OF]->(:Organization)
(:Organization)-[:PARTNER_OF]->(:Organization)
(:Organization)-[:EMPLOYS {count: INTEGER}]->(:Person)
(:Organization)-[:PRODUCES]->(:Product)
(:Organization)-[:PARTICIPATED_IN {role: STRING}]->(:Event)
// Location relationships
(:Location)-[:LOCATED_IN]->(:Location)
(:Location)-[:BORDERS]->(:Location)
(:Location)-[:CAPITAL_OF]->(:Location)
// Concept relationships
(:Concept)-[:SUBCLASS_OF]->(:Concept)
(:Concept)-[:RELATED_TO]->(:Concept)
(:Entity)-[:INSTANCE_OF]->(:Concept)
// Event relationships
(:Event)-[:OCCURRED_IN]->(:Location)
(:Event)-[:CAUSED]->(:Event)
(:Event)-[:PRECEDED]->(:Event)
Schema Setup
// Constraints
CREATE CONSTRAINT entity_id_unique ON :Entity(id) ASSERT UNIQUE
// Indexes for search
CREATE INDEX entity_name ON :Entity(name)
CREATE INDEX entity_aliases ON :Entity(aliases)
CREATE INDEX person_name ON :Person(name)
CREATE INDEX organization_name ON :Organization(name)
CREATE INDEX location_name ON :Location(name)
CREATE INDEX concept_name ON :Concept(name)
// Full-text search index
CREATE FULLTEXT INDEX entity_search ON :Entity(name, description, aliases)
Entity-Relationship Modeling
Creating Entities
// Create a person entity
CREATE (p:Person:Entity {
id: $id,
name: $name,
description: $description,
aliases: $aliases,
birth_date: date($birth_date),
nationality: $nationality,
occupation: $occupation,
source: $source,
confidence: $confidence,
created_at: timestamp()
})
RETURN p
package main
import (
"context"
"database/sql"
"log"
"github.com/google/uuid"
_ "geodedb.com/geode"
)
type Entity struct {
ID string
Name string
Description string
Aliases []string
Source string
Confidence float64
}
type Person struct {
Entity
BirthDate string
Nationality string
Occupation []string
}
func CreatePerson(ctx context.Context, db *sql.DB, person Person) (string, error) {
person.ID = uuid.New().String()
_, err := db.ExecContext(ctx, `
CREATE (p:Person:Entity {
id: ?,
name: ?,
description: ?,
aliases: ?,
birth_date: date(?),
nationality: ?,
occupation: ?,
source: ?,
confidence: ?,
created_at: timestamp()
})
`, person.ID, person.Name, person.Description, person.Aliases,
person.BirthDate, person.Nationality, person.Occupation,
person.Source, person.Confidence)
return person.ID, err
}
func main() {
db, err := sql.Open("geode", "localhost:3141")
if err != nil {
log.Fatal(err)
}
defer db.Close()
ctx := context.Background()
id, err := CreatePerson(ctx, db, Person{
Entity: Entity{
Name: "Marie Curie",
Description: "Polish-French physicist and chemist",
Aliases: []string{"Maria Sklodowska", "Madame Curie"},
Source: "wikipedia",
Confidence: 0.99,
},
BirthDate: "1867-11-07",
Nationality: "Polish-French",
Occupation: []string{"physicist", "chemist"},
})
if err != nil {
log.Fatal(err)
}
log.Printf("Created person: %s", id)
}
import asyncio
from dataclasses import dataclass, field
from typing import List, Optional
from datetime import date
from uuid import uuid4
from geode_client import Client
@dataclass
class Entity:
id: str = ""
name: str = ""
description: str = ""
aliases: List[str] = field(default_factory=list)
source: str = ""
confidence: float = 1.0
@dataclass
class Person(Entity):
birth_date: Optional[date] = None
death_date: Optional[date] = None
nationality: str = ""
occupation: List[str] = field(default_factory=list)
async def create_person(client, person: Person) -> str:
"""Create a person entity in the knowledge graph."""
person.id = str(uuid4())
async with client.connection() as conn:
await conn.execute("""
CREATE (p:Person:Entity {
id: $id,
name: $name,
description: $description,
aliases: $aliases,
birth_date: date($birth_date),
nationality: $nationality,
occupation: $occupation,
source: $source,
confidence: $confidence,
created_at: timestamp()
})
""", {
"id": person.id,
"name": person.name,
"description": person.description,
"aliases": person.aliases,
"birth_date": person.birth_date.isoformat() if person.birth_date else None,
"nationality": person.nationality,
"occupation": person.occupation,
"source": person.source,
"confidence": person.confidence
})
return person.id
async def main():
client = Client(host="localhost", port=3141, skip_verify=True)
person_id = await create_person(client, Person(
name="Marie Curie",
description="Polish-French physicist and chemist",
aliases=["Maria Sklodowska", "Madame Curie"],
birth_date=date(1867, 11, 7),
nationality="Polish-French",
occupation=["physicist", "chemist"],
source="wikipedia",
confidence=0.99
))
print(f"Created person: {person_id}")
asyncio.run(main())
use geode_client::{Client, Value};
use std::collections::HashMap;
use uuid::Uuid;
#[derive(Debug)]
struct Person {
id: String,
name: String,
description: String,
aliases: Vec<String>,
birth_date: String,
nationality: String,
occupation: Vec<String>,
source: String,
confidence: f64,
}
async fn create_person(
conn: &mut geode_client::Connection,
person: &mut Person,
) -> Result<String, Box<dyn std::error::Error>> {
person.id = Uuid::new_v4().to_string();
let mut params = HashMap::new();
params.insert("id".to_string(), Value::string(&person.id));
params.insert("name".to_string(), Value::string(&person.name));
params.insert("description".to_string(), Value::string(&person.description));
params.insert("aliases".to_string(), Value::list(
person.aliases.iter().map(|a| Value::string(a)).collect()
));
params.insert("birth_date".to_string(), Value::string(&person.birth_date));
params.insert("nationality".to_string(), Value::string(&person.nationality));
params.insert("occupation".to_string(), Value::list(
person.occupation.iter().map(|o| Value::string(o)).collect()
));
params.insert("source".to_string(), Value::string(&person.source));
params.insert("confidence".to_string(), Value::float(person.confidence));
conn.query_with_params(r#"
CREATE (p:Person:Entity {
id: $id,
name: $name,
description: $description,
aliases: $aliases,
birth_date: date($birth_date),
nationality: $nationality,
occupation: $occupation,
source: $source,
confidence: $confidence,
created_at: timestamp()
})
"#, ¶ms).await?;
Ok(person.id.clone())
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = Client::new("127.0.0.1", 3141).skip_verify(true);
let mut conn = client.connect().await?;
let mut person = Person {
id: String::new(),
name: "Marie Curie".to_string(),
description: "Polish-French physicist and chemist".to_string(),
aliases: vec!["Maria Sklodowska".to_string(), "Madame Curie".to_string()],
birth_date: "1867-11-07".to_string(),
nationality: "Polish-French".to_string(),
occupation: vec!["physicist".to_string(), "chemist".to_string()],
source: "wikipedia".to_string(),
confidence: 0.99,
};
let id = create_person(&mut conn, &mut person).await?;
println!("Created person: {}", id);
Ok(())
}
import { createClient, Client } from '@geodedb/client';
import { v4 as uuidv4 } from 'uuid';
interface Entity {
id: string;
name: string;
description: string;
aliases: string[];
source: string;
confidence: number;
}
interface Person extends Entity {
birthDate?: string;
deathDate?: string;
nationality: string;
occupation: string[];
}
async function createPerson(client: Client, person: Partial<Person>): Promise<string> {
const id = uuidv4();
await client.exec(`
CREATE (p:Person:Entity {
id: $id,
name: $name,
description: $description,
aliases: $aliases,
birth_date: date($birth_date),
nationality: $nationality,
occupation: $occupation,
source: $source,
confidence: $confidence,
created_at: timestamp()
})
`, {
params: {
id,
name: person.name,
description: person.description,
aliases: person.aliases || [],
birth_date: person.birthDate,
nationality: person.nationality,
occupation: person.occupation || [],
source: person.source,
confidence: person.confidence || 1.0
}
});
return id;
}
async function main() {
const client = await createClient('quic://localhost:3141');
const personId = await createPerson(client, {
name: 'Marie Curie',
description: 'Polish-French physicist and chemist',
aliases: ['Maria Sklodowska', 'Madame Curie'],
birthDate: '1867-11-07',
nationality: 'Polish-French',
occupation: ['physicist', 'chemist'],
source: 'wikipedia',
confidence: 0.99
});
console.log(`Created person: ${personId}`);
await client.close();
}
main();
const std = @import("std");
const geode = @import("geode_client");
const uuid = @import("uuid");
const Person = struct {
id: []const u8,
name: []const u8,
description: []const u8,
aliases: []const []const u8,
birth_date: []const u8,
nationality: []const u8,
occupation: []const []const u8,
source: []const u8,
confidence: f64,
};
pub fn createPerson(
client: *geode.GeodeClient,
allocator: std.mem.Allocator,
person: *Person,
) ![]const u8 {
const id = uuid.v4();
person.id = &id;
var params = std.json.ObjectMap.init(allocator);
defer params.deinit();
try params.put("id", .{ .string = &id });
try params.put("name", .{ .string = person.name });
try params.put("description", .{ .string = person.description });
// ... add other parameters
try client.sendRunGql(1,
\\CREATE (p:Person:Entity {
\\ id: $id,
\\ name: $name,
\\ description: $description,
\\ aliases: $aliases,
\\ birth_date: date($birth_date),
\\ nationality: $nationality,
\\ occupation: $occupation,
\\ source: $source,
\\ confidence: $confidence,
\\ created_at: timestamp()
\\})
, .{ .object = params });
_ = try client.receiveMessage(30000);
return &id;
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
var client = geode.GeodeClient.init(allocator, "localhost", 3141, true);
defer client.deinit();
try client.connect();
try client.sendHello("knowledge-graph", "1.0.0");
_ = try client.receiveMessage(30000);
var person = Person{
.id = "",
.name = "Marie Curie",
.description = "Polish-French physicist and chemist",
.aliases = &[_][]const u8{ "Maria Sklodowska", "Madame Curie" },
.birth_date = "1867-11-07",
.nationality = "Polish-French",
.occupation = &[_][]const u8{ "physicist", "chemist" },
.source = "wikipedia",
.confidence = 0.99,
};
const id = try createPerson(&client, allocator, &person);
std.debug.print("Created person: {s}\n", .{id});
}
Creating Relationships
// Link entities with relationships
MATCH (marie:Person {name: "Marie Curie"})
MATCH (sorbonne:Organization {name: "University of Paris"})
CREATE (marie)-[:EDUCATED_AT {
degree: "PhD",
field: "Physics",
year: 1903
}]->(sorbonne)
MATCH (marie:Person {name: "Marie Curie"})
MATCH (poland:Location {name: "Poland"})
CREATE (marie)-[:BORN_IN {date: date('1867-11-07')}]->(poland)
MATCH (marie:Person {name: "Marie Curie"})
MATCH (pierre:Person {name: "Pierre Curie"})
CREATE (marie)-[:MARRIED_TO {
start_date: date('1895-07-26'),
end_date: date('1906-04-19')
}]->(pierre)
Extracting Entity Relationships
// Find all relationships for an entity
MATCH (e:Entity {id: $entity_id})-[r]->(related:Entity)
RETURN
type(r) AS relationship_type,
properties(r) AS relationship_properties,
related.id AS related_id,
related.name AS related_name,
labels(related) AS related_types
ORDER BY relationship_type
// Find incoming relationships
UNION
MATCH (e:Entity {id: $entity_id})<-[r]-(related:Entity)
RETURN
type(r) AS relationship_type,
properties(r) AS relationship_properties,
related.id AS related_id,
related.name AS related_name,
labels(related) AS related_types
ORDER BY relationship_type
Inference Patterns
Type Inference
// Infer nationality from birthplace
MATCH (p:Person)-[:BORN_IN]->(loc:Location)-[:LOCATED_IN*0..3]->(country:Location {location_type: 'country'})
WHERE p.nationality IS NULL
SET p.inferred_nationality = country.name
RETURN p.name, p.inferred_nationality
Transitive Relationships
// Find all ancestors (transitive closure)
MATCH (person:Person {id: $person_id})
MATCH path = (person)<-[:PARENT_OF*]-(ancestor:Person)
RETURN ancestor.name, length(path) AS generations
ORDER BY generations
// Find organizational hierarchy
MATCH (org:Organization {id: $org_id})
MATCH path = (org)-[:SUBSIDIARY_OF*]->(parent:Organization)
RETURN parent.name, length(path) AS levels
ORDER BY levels
Relationship Inference
// Infer sibling relationships
MATCH (p1:Person)<-[:PARENT_OF]-(parent:Person)-[:PARENT_OF]->(p2:Person)
WHERE p1 <> p2
AND NOT (p1)-[:SIBLING_OF]-(p2)
MERGE (p1)-[:SIBLING_OF {inferred: true}]-(p2)
RETURN p1.name, p2.name
// Infer colleague relationships
MATCH (p1:Person)-[:WORKS_FOR]->(org:Organization)<-[:WORKS_FOR]-(p2:Person)
WHERE p1 <> p2
AND NOT (p1)-[:COLLEAGUE_OF]-(p2)
MERGE (p1)-[:COLLEAGUE_OF {
inferred: true,
via_organization: org.name
}]-(p2)
Temporal Inference
// Infer if person was alive during event
MATCH (person:Person), (event:Event)
WHERE person.birth_date <= event.start_date
AND (person.death_date IS NULL OR person.death_date >= event.start_date)
MERGE (person)-[:CONTEMPORARY_WITH {inferred: true}]->(event)
RETURN person.name, event.name
Question Answering Queries
Simple Factual Queries
// "Where was Marie Curie born?"
MATCH (p:Person {name: "Marie Curie"})-[:BORN_IN]->(location:Location)
RETURN location.name AS answer
// "Who founded Microsoft?"
MATCH (p:Person)-[:FOUNDED]->(org:Organization {name: "Microsoft"})
RETURN p.name AS founder
// "When did World War II start?"
MATCH (e:Event {name: "World War II"})
RETURN e.start_date AS start_date
type Answer struct {
Value interface{}
Confidence float64
Source string
}
func AnswerQuestion(ctx context.Context, db *sql.DB, question string) (*Answer, error) {
// Pattern matching for question types
// This would typically use NLP to parse the question
// Example: "Where was X born?"
if strings.Contains(question, "born") {
// Extract entity name from question
entityName := extractEntityName(question)
row := db.QueryRowContext(ctx, `
MATCH (p:Person {name: ?})-[:BORN_IN]->(location:Location)
RETURN location.name AS answer, p.confidence AS confidence, p.source AS source
`, entityName)
var answer Answer
err := row.Scan(&answer.Value, &answer.Confidence, &answer.Source)
if err != nil {
return nil, err
}
return &answer, nil
}
// Add more question patterns...
return nil, fmt.Errorf("could not understand question")
}
from dataclasses import dataclass
from typing import Any, Optional
import re
@dataclass
class Answer:
value: Any
confidence: float
source: str
explanation: Optional[str] = None
async def answer_question(client, question: str) -> Optional[Answer]:
"""Answer a natural language question using the knowledge graph."""
# Pattern: "Where was X born?"
born_match = re.match(r"[Ww]here was (.+?) born\??", question)
if born_match:
entity_name = born_match.group(1)
async with client.connection() as conn:
result, _ = await conn.query("""
MATCH (p:Person {name: $name})-[:BORN_IN]->(location:Location)
RETURN location.name AS answer, p.confidence AS confidence, p.source AS source
""", {"name": entity_name})
if result.rows:
row = result.rows[0]
return Answer(
value=row['answer'].as_string,
confidence=row['confidence'].as_float,
source=row['source'].as_string,
explanation=f"{entity_name} was born in {row['answer'].as_string}"
)
# Pattern: "Who founded X?"
founded_match = re.match(r"[Ww]ho founded (.+?)\??", question)
if founded_match:
org_name = founded_match.group(1)
async with client.connection() as conn:
result, _ = await conn.query("""
MATCH (p:Person)-[:FOUNDED]->(org:Organization {name: $name})
RETURN p.name AS answer, p.confidence AS confidence, p.source AS source
""", {"name": org_name})
if result.rows:
row = result.rows[0]
return Answer(
value=row['answer'].as_string,
confidence=row['confidence'].as_float,
source=row['source'].as_string,
explanation=f"{row['answer'].as_string} founded {org_name}"
)
# Pattern: "What is the capital of X?"
capital_match = re.match(r"[Ww]hat is the capital of (.+?)\??", question)
if capital_match:
country_name = capital_match.group(1)
async with client.connection() as conn:
result, _ = await conn.query("""
MATCH (capital:Location)-[:CAPITAL_OF]->(country:Location {name: $name})
RETURN capital.name AS answer
""", {"name": country_name})
if result.rows:
return Answer(
value=result.rows[0]['answer'].as_string,
confidence=1.0,
source="knowledge_graph",
explanation=f"The capital of {country_name} is {result.rows[0]['answer'].as_string}"
)
return None
async def main():
client = Client(host="localhost", port=3141, skip_verify=True)
questions = [
"Where was Marie Curie born?",
"Who founded Microsoft?",
"What is the capital of France?"
]
for q in questions:
answer = await answer_question(client, q)
if answer:
print(f"Q: {q}")
print(f"A: {answer.value} (confidence: {answer.confidence})")
print()
use regex::Regex;
#[derive(Debug)]
struct Answer {
value: String,
confidence: f64,
source: String,
explanation: Option<String>,
}
async fn answer_question(
conn: &mut geode_client::Connection,
question: &str,
) -> Result<Option<Answer>, Box<dyn std::error::Error>> {
// Pattern: "Where was X born?"
let born_re = Regex::new(r"[Ww]here was (.+?) born\??")?;
if let Some(caps) = born_re.captures(question) {
let entity_name = &caps[1];
let mut params = HashMap::new();
params.insert("name".to_string(), Value::string(entity_name));
let (page, _) = conn.query_with_params(r#"
MATCH (p:Person {name: $name})-[:BORN_IN]->(location:Location)
RETURN location.name AS answer, p.confidence AS confidence, p.source AS source
"#, ¶ms).await?;
if let Some(row) = page.rows.first() {
return Ok(Some(Answer {
value: row.get("answer").unwrap().as_string()?,
confidence: row.get("confidence").unwrap().as_float()?,
source: row.get("source").unwrap().as_string()?,
explanation: Some(format!("{} was born in {}",
entity_name, row.get("answer").unwrap().as_string()?)),
}));
}
}
// Pattern: "Who founded X?"
let founded_re = Regex::new(r"[Ww]ho founded (.+?)\??")?;
if let Some(caps) = founded_re.captures(question) {
let org_name = &caps[1];
let mut params = HashMap::new();
params.insert("name".to_string(), Value::string(org_name));
let (page, _) = conn.query_with_params(r#"
MATCH (p:Person)-[:FOUNDED]->(org:Organization {name: $name})
RETURN p.name AS answer, p.confidence AS confidence, p.source AS source
"#, ¶ms).await?;
if let Some(row) = page.rows.first() {
return Ok(Some(Answer {
value: row.get("answer").unwrap().as_string()?,
confidence: row.get("confidence").unwrap().as_float()?,
source: row.get("source").unwrap().as_string()?,
explanation: Some(format!("{} founded {}",
row.get("answer").unwrap().as_string()?, org_name)),
}));
}
}
Ok(None)
}
interface Answer {
value: string;
confidence: number;
source: string;
explanation?: string;
}
async function answerQuestion(client: Client, question: string): Promise<Answer | null> {
// Pattern: "Where was X born?"
const bornMatch = question.match(/[Ww]here was (.+?) born\??/);
if (bornMatch) {
const entityName = bornMatch[1];
const rows = await client.queryAll(`
MATCH (p:Person {name: $name})-[:BORN_IN]->(location:Location)
RETURN location.name AS answer, p.confidence AS confidence, p.source AS source
`, { params: { name: entityName } });
if (rows.length > 0) {
const row = rows[0];
return {
value: row.get('answer')?.asString ?? '',
confidence: row.get('confidence')?.asNumber ?? 0,
source: row.get('source')?.asString ?? '',
explanation: `${entityName} was born in ${row.get('answer')?.asString}`
};
}
}
// Pattern: "Who founded X?"
const foundedMatch = question.match(/[Ww]ho founded (.+?)\??/);
if (foundedMatch) {
const orgName = foundedMatch[1];
const rows = await client.queryAll(`
MATCH (p:Person)-[:FOUNDED]->(org:Organization {name: $name})
RETURN p.name AS answer, p.confidence AS confidence, p.source AS source
`, { params: { name: orgName } });
if (rows.length > 0) {
const row = rows[0];
return {
value: row.get('answer')?.asString ?? '',
confidence: row.get('confidence')?.asNumber ?? 0,
source: row.get('source')?.asString ?? '',
explanation: `${row.get('answer')?.asString} founded ${orgName}`
};
}
}
return null;
}
const Answer = struct {
value: []const u8,
confidence: f64,
source: []const u8,
explanation: ?[]const u8,
};
pub fn answerQuestion(
client: *geode.GeodeClient,
allocator: std.mem.Allocator,
question: []const u8,
) !?Answer {
// Pattern matching for "Where was X born?"
if (std.mem.indexOf(u8, question, "born")) |_| {
// Extract entity name (simplified)
const entity_name = extractEntityName(question);
var params = std.json.ObjectMap.init(allocator);
defer params.deinit();
try params.put("name", .{ .string = entity_name });
try client.sendRunGql(1,
\\MATCH (p:Person {name: $name})-[:BORN_IN]->(location:Location)
\\RETURN location.name AS answer, p.confidence AS confidence, p.source AS source
, .{ .object = params });
_ = try client.receiveMessage(30000);
try client.sendPull(1, 1);
const result = try client.receiveMessage(30000);
defer allocator.free(result);
// Parse result and return Answer
// ...
}
return null;
}
Complex Queries
// "What scientists won Nobel Prizes in Physics?"
MATCH (p:Person)-[:RECEIVED]->(award:Award {name: "Nobel Prize in Physics"})
WHERE "scientist" IN p.occupation OR "physicist" IN p.occupation
RETURN p.name AS scientist, award.year AS year
ORDER BY award.year
// "Which companies were founded by Stanford graduates?"
MATCH (founder:Person)-[:EDUCATED_AT]->(:Organization {name: "Stanford University"})
MATCH (founder)-[:FOUNDED]->(company:Organization)
RETURN founder.name AS founder, company.name AS company, company.founded_date
// "Find all connections between Einstein and Curie"
MATCH path = shortestPath(
(einstein:Person {name: "Albert Einstein"})-[*..5]-(curie:Person {name: "Marie Curie"})
)
RETURN path
// "What events did Marie Curie participate in?"
MATCH (marie:Person {name: "Marie Curie"})-[r:PARTICIPATED_IN]->(event:Event)
RETURN event.name, r.role, event.start_date
ORDER BY event.start_date
Path-Based Queries
// Find how two entities are connected
MATCH (e1:Entity {id: $entity1_id}), (e2:Entity {id: $entity2_id})
MATCH path = shortestPath((e1)-[*..6]-(e2))
RETURN [node IN nodes(path) | node.name] AS entity_names,
[rel IN relationships(path) | type(rel)] AS relationship_types,
length(path) AS path_length
// Find all paths between entities
MATCH (e1:Entity {id: $entity1_id}), (e2:Entity {id: $entity2_id})
MATCH path = (e1)-[*..4]-(e2)
WHERE ALL(n IN nodes(path) WHERE n:Entity)
RETURN path
LIMIT 10
NLP Integration
Entity Extraction Pipeline
// Store extracted entities from NLP
CREATE (mention:Mention {
id: $mention_id,
text: $surface_form,
document_id: $document_id,
start_offset: $start_offset,
end_offset: $end_offset,
confidence: $confidence
})
// Link to resolved entity
MATCH (mention:Mention {id: $mention_id})
MATCH (entity:Entity {id: $entity_id})
CREATE (mention)-[:REFERS_TO {
confidence: $linking_confidence,
method: $linking_method
}]->(entity)
type EntityMention struct {
ID string
Text string
DocumentID string
StartOffset int
EndOffset int
Confidence float64
EntityID string // Resolved entity
LinkingConf float64
}
func StoreEntityMentions(ctx context.Context, db *sql.DB, mentions []EntityMention) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer tx.Rollback()
for _, m := range mentions {
// Create mention node
_, err = tx.ExecContext(ctx, `
CREATE (mention:Mention {
id: ?,
text: ?,
document_id: ?,
start_offset: ?,
end_offset: ?,
confidence: ?
})
`, m.ID, m.Text, m.DocumentID, m.StartOffset, m.EndOffset, m.Confidence)
if err != nil {
return err
}
// Link to entity if resolved
if m.EntityID != "" {
_, err = tx.ExecContext(ctx, `
MATCH (mention:Mention {id: ?})
MATCH (entity:Entity {id: ?})
CREATE (mention)-[:REFERS_TO {
confidence: ?,
method: 'nlp_extraction'
}]->(entity)
`, m.ID, m.EntityID, m.LinkingConf)
if err != nil {
return err
}
}
}
return tx.Commit()
}
from dataclasses import dataclass
from typing import List, Optional
@dataclass
class EntityMention:
id: str
text: str
document_id: str
start_offset: int
end_offset: int
confidence: float
entity_id: Optional[str] = None
linking_confidence: float = 0.0
async def store_entity_mentions(client, mentions: List[EntityMention]) -> None:
"""Store NLP-extracted entity mentions in the knowledge graph."""
async with client.connection() as conn:
await conn.begin()
try:
for mention in mentions:
# Create mention node
await conn.execute("""
CREATE (mention:Mention {
id: $id,
text: $text,
document_id: $document_id,
start_offset: $start_offset,
end_offset: $end_offset,
confidence: $confidence
})
""", {
"id": mention.id,
"text": mention.text,
"document_id": mention.document_id,
"start_offset": mention.start_offset,
"end_offset": mention.end_offset,
"confidence": mention.confidence
})
# Link to resolved entity
if mention.entity_id:
await conn.execute("""
MATCH (mention:Mention {id: $mention_id})
MATCH (entity:Entity {id: $entity_id})
CREATE (mention)-[:REFERS_TO {
confidence: $linking_confidence,
method: 'nlp_extraction'
}]->(entity)
""", {
"mention_id": mention.id,
"entity_id": mention.entity_id,
"linking_confidence": mention.linking_confidence
})
await conn.commit()
except Exception as e:
await conn.rollback()
raise e
# Example: Integration with spaCy
import spacy
async def extract_and_store_entities(client, document_id: str, text: str):
"""Extract entities from text using spaCy and store in knowledge graph."""
nlp = spacy.load("en_core_web_lg")
doc = nlp(text)
mentions = []
for ent in doc.ents:
mention_id = str(uuid4())
# Try to link to existing entity
entity_id = await find_matching_entity(client, ent.text, ent.label_)
mentions.append(EntityMention(
id=mention_id,
text=ent.text,
document_id=document_id,
start_offset=ent.start_char,
end_offset=ent.end_char,
confidence=0.9, # spaCy confidence
entity_id=entity_id,
linking_confidence=0.8 if entity_id else 0.0
))
await store_entity_mentions(client, mentions)
return mentions
async def find_matching_entity(client, text: str, entity_type: str) -> Optional[str]:
"""Find matching entity in knowledge graph."""
label_mapping = {
"PERSON": "Person",
"ORG": "Organization",
"GPE": "Location",
"LOC": "Location"
}
kg_label = label_mapping.get(entity_type)
if not kg_label:
return None
async with client.connection() as conn:
result, _ = await conn.query(f"""
MATCH (e:{kg_label})
WHERE e.name = $name OR $name IN e.aliases
RETURN e.id AS id
LIMIT 1
""", {"name": text})
if result.rows:
return result.rows[0]['id'].as_string
return None
#[derive(Debug)]
struct EntityMention {
id: String,
text: String,
document_id: String,
start_offset: i64,
end_offset: i64,
confidence: f64,
entity_id: Option<String>,
linking_confidence: f64,
}
async fn store_entity_mentions(
conn: &mut geode_client::Connection,
mentions: &[EntityMention],
) -> Result<(), Box<dyn std::error::Error>> {
conn.begin().await?;
for mention in mentions {
// Create mention node
let mut params = HashMap::new();
params.insert("id".to_string(), Value::string(&mention.id));
params.insert("text".to_string(), Value::string(&mention.text));
params.insert("document_id".to_string(), Value::string(&mention.document_id));
params.insert("start_offset".to_string(), Value::int(mention.start_offset));
params.insert("end_offset".to_string(), Value::int(mention.end_offset));
params.insert("confidence".to_string(), Value::float(mention.confidence));
conn.query_with_params(r#"
CREATE (mention:Mention {
id: $id,
text: $text,
document_id: $document_id,
start_offset: $start_offset,
end_offset: $end_offset,
confidence: $confidence
})
"#, ¶ms).await?;
// Link to entity if resolved
if let Some(ref entity_id) = mention.entity_id {
let mut link_params = HashMap::new();
link_params.insert("mention_id".to_string(), Value::string(&mention.id));
link_params.insert("entity_id".to_string(), Value::string(entity_id));
link_params.insert("linking_confidence".to_string(), Value::float(mention.linking_confidence));
conn.query_with_params(r#"
MATCH (mention:Mention {id: $mention_id})
MATCH (entity:Entity {id: $entity_id})
CREATE (mention)-[:REFERS_TO {
confidence: $linking_confidence,
method: 'nlp_extraction'
}]->(entity)
"#, &link_params).await?;
}
}
conn.commit().await?;
Ok(())
}
interface EntityMention {
id: string;
text: string;
documentId: string;
startOffset: number;
endOffset: number;
confidence: number;
entityId?: string;
linkingConfidence?: number;
}
async function storeEntityMentions(client: Client, mentions: EntityMention[]): Promise<void> {
await client.withTransaction(async (tx) => {
for (const mention of mentions) {
// Create mention node
await tx.exec(`
CREATE (mention:Mention {
id: $id,
text: $text,
document_id: $document_id,
start_offset: $start_offset,
end_offset: $end_offset,
confidence: $confidence
})
`, {
params: {
id: mention.id,
text: mention.text,
document_id: mention.documentId,
start_offset: mention.startOffset,
end_offset: mention.endOffset,
confidence: mention.confidence
}
});
// Link to entity if resolved
if (mention.entityId) {
await tx.exec(`
MATCH (mention:Mention {id: $mention_id})
MATCH (entity:Entity {id: $entity_id})
CREATE (mention)-[:REFERS_TO {
confidence: $linking_confidence,
method: 'nlp_extraction'
}]->(entity)
`, {
params: {
mention_id: mention.id,
entity_id: mention.entityId,
linking_confidence: mention.linkingConfidence || 0
}
});
}
}
});
}
const EntityMention = struct {
id: []const u8,
text: []const u8,
document_id: []const u8,
start_offset: i64,
end_offset: i64,
confidence: f64,
entity_id: ?[]const u8,
linking_confidence: f64,
};
pub fn storeEntityMentions(
client: *geode.GeodeClient,
allocator: std.mem.Allocator,
mentions: []const EntityMention,
) !void {
try client.sendBegin();
_ = try client.receiveMessage(30000);
for (mentions) |mention| {
var params = std.json.ObjectMap.init(allocator);
defer params.deinit();
try params.put("id", .{ .string = mention.id });
try params.put("text", .{ .string = mention.text });
try params.put("document_id", .{ .string = mention.document_id });
try params.put("start_offset", .{ .integer = mention.start_offset });
try params.put("end_offset", .{ .integer = mention.end_offset });
try params.put("confidence", .{ .float = mention.confidence });
try client.sendRunGql(1,
\\CREATE (mention:Mention {
\\ id: $id,
\\ text: $text,
\\ document_id: $document_id,
\\ start_offset: $start_offset,
\\ end_offset: $end_offset,
\\ confidence: $confidence
\\})
, .{ .object = params });
_ = try client.receiveMessage(30000);
if (mention.entity_id) |entity_id| {
var link_params = std.json.ObjectMap.init(allocator);
defer link_params.deinit();
try link_params.put("mention_id", .{ .string = mention.id });
try link_params.put("entity_id", .{ .string = entity_id });
try link_params.put("linking_confidence", .{ .float = mention.linking_confidence });
try client.sendRunGql(2,
\\MATCH (mention:Mention {id: $mention_id})
\\MATCH (entity:Entity {id: $entity_id})
\\CREATE (mention)-[:REFERS_TO {
\\ confidence: $linking_confidence,
\\ method: 'nlp_extraction'
\\}]->(entity)
, .{ .object = link_params });
_ = try client.receiveMessage(30000);
}
}
try client.sendCommit();
_ = try client.receiveMessage(30000);
}
Relation Extraction
// Store extracted relation
CREATE (rel:ExtractedRelation {
id: $relation_id,
subject_mention_id: $subject_mention_id,
object_mention_id: $object_mention_id,
relation_type: $relation_type,
confidence: $confidence,
document_id: $document_id,
sentence: $sentence
})
// Link to mentions
MATCH (rel:ExtractedRelation {id: $relation_id})
MATCH (subject:Mention {id: $subject_mention_id})
MATCH (object:Mention {id: $object_mention_id})
CREATE (rel)-[:HAS_SUBJECT]->(subject)
CREATE (rel)-[:HAS_OBJECT]->(object)
Knowledge Graph Population
Import from Structured Data
// Import from CSV
LOAD CSV WITH HEADERS FROM 'file:///people.csv' AS row
CREATE (p:Person:Entity {
id: row.id,
name: row.name,
description: row.description,
birth_date: date(row.birth_date),
nationality: row.nationality,
source: 'csv_import',
created_at: timestamp()
})
// Import relationships from CSV
LOAD CSV WITH HEADERS FROM 'file:///works_for.csv' AS row
MATCH (person:Person {id: row.person_id})
MATCH (org:Organization {id: row.org_id})
CREATE (person)-[:WORKS_FOR {
start_date: date(row.start_date),
end_date: CASE WHEN row.end_date IS NOT NULL THEN date(row.end_date) ELSE null END,
role: row.role
}]->(org)
Import from Wikidata
// Create entities from Wikidata SPARQL results
UNWIND $wikidata_results AS item
MERGE (e:Entity {wikidata_id: item.id})
ON CREATE SET
e.id = randomUUID(),
e.name = item.label,
e.description = item.description,
e.aliases = item.aliases,
e.source = 'wikidata',
e.confidence = 1.0,
e.created_at = timestamp()
ON MATCH SET
e.name = item.label,
e.description = item.description,
e.updated_at = timestamp()
// Add appropriate label based on Wikidata type
WITH e, item
CALL {
WITH e, item
WHERE item.instance_of = 'Q5' // human
SET e:Person
RETURN e
UNION
WITH e, item
WHERE item.instance_of IN ['Q4830453', 'Q783794'] // company or types
SET e:Organization
RETURN e
UNION
WITH e, item
WHERE item.instance_of IN ['Q515', 'Q6256'] // city or country
SET e:Location
RETURN e
}
RETURN count(e) AS imported_count
Merge Duplicate Entities
// Find potential duplicates
MATCH (e1:Entity), (e2:Entity)
WHERE e1.name = e2.name
AND id(e1) < id(e2)
AND labels(e1) = labels(e2)
WITH e1, e2,
CASE
WHEN e1.source = 'wikidata' THEN e1
WHEN e2.source = 'wikidata' THEN e2
WHEN e1.confidence > e2.confidence THEN e1
ELSE e2
END AS keeper,
CASE
WHEN e1.source = 'wikidata' THEN e2
WHEN e2.source = 'wikidata' THEN e1
WHEN e1.confidence > e2.confidence THEN e2
ELSE e1
END AS duplicate
// Merge relationships from duplicate to keeper
MATCH (duplicate)-[r]-(other)
WHERE NOT other = keeper
WITH keeper, duplicate, r, other
CREATE (keeper)-[newRel:type(r)]->(other)
SET newRel = properties(r)
// Add duplicate's aliases to keeper
SET keeper.aliases = keeper.aliases + duplicate.aliases + [duplicate.name]
// Delete duplicate
DETACH DELETE duplicate
Analytics and Quality
Knowledge Graph Statistics
// Count entities by type
MATCH (e:Entity)
WITH labels(e) AS entity_labels
UNWIND entity_labels AS label
WHERE label <> 'Entity'
RETURN label AS entity_type, count(*) AS count
ORDER BY count DESC
// Count relationships by type
MATCH ()-[r]->()
RETURN type(r) AS relationship_type, count(r) AS count
ORDER BY count DESC
// Calculate graph density
MATCH (n)
WITH count(n) AS node_count
MATCH ()-[r]->()
WITH node_count, count(r) AS edge_count
RETURN
node_count,
edge_count,
toFloat(edge_count) / (node_count * (node_count - 1)) AS density
Data Quality Checks
// Find entities without relationships
MATCH (e:Entity)
WHERE NOT (e)-[]-()
RETURN e.id, e.name, labels(e) AS types
LIMIT 100
// Find low confidence entities
MATCH (e:Entity)
WHERE e.confidence < 0.5
RETURN e.id, e.name, e.confidence, e.source
ORDER BY e.confidence
// Find entities with missing required properties
MATCH (p:Person)
WHERE p.name IS NULL OR p.name = ''
RETURN p.id, p.source
// Find orphan mentions (not linked to entities)
MATCH (m:Mention)
WHERE NOT (m)-[:REFERS_TO]->()
RETURN m.id, m.text, m.document_id
LIMIT 100
Next Steps
- Fraud Detection Guide - Pattern detection in knowledge graphs
- Recommendation Engine Guide - Entity recommendations
- Query Performance Guide - Optimize knowledge graph queries
- Data Import Guide - Import knowledge from external sources
Resources
Questions? Join our community forum to discuss knowledge graph implementations.