Comprehensive guide to Geode’s implementation of ISO/IEC 39075:2024 GQL standard, covering architecture, design decisions, optimization strategies, and development methodology for standards-compliant graph database systems.
Implementation Architecture
Query Processing Pipeline
┌─────────────┐
│ GQL Query │
└──────┬──────┘
│
v
┌─────────────┐
│ Lexer │ Tokenize input
└──────┬──────┘
│
v
┌─────────────┐
│ Parser │ Build AST (Abstract Syntax Tree)
└──────┬──────┘
│
v
┌─────────────┐
│ Semantic │ Type checking, name resolution
│ Analyzer │
└──────┬──────┘
│
v
┌─────────────┐
│ Optimizer │ Query optimization, rewriting
└──────┬──────┘
│
v
┌─────────────┐
│ Executor │ Execute plan
└──────┬──────┘
│
v
┌─────────────┐
│ Results │
└─────────────┘
Core Components
Lexer (gql/src/lexer.zig):
- Tokenizes GQL input into lexical units
- Handles keywords, identifiers, literals, operators
- Implements ISO §5 (Lexical elements)
Parser (gql/src/parser.zig):
- Constructs Abstract Syntax Tree (AST)
- Validates grammar per ISO §7-9
- Error recovery and reporting
AST (gql/src/ast.zig):
- Represents parsed query structure
- Node types for all GQL constructs
- Visitor pattern for traversal
Type System (geode/src/query/types.zig):
- Implements ISO §6 data types
- Type inference and checking
- Type coercion rules
Optimizer (geode/src/query/optimizer.zig):
- Cost-based query optimization
- Pattern rewriting
- Index selection
Executor (geode/src/query/executor.zig):
- Interprets execution plan
- Accesses storage engine
- Produces result streams
Standards Implementation
Lexical Elements (ISO §5)
Keywords:
// geode/src/query/lexer.zig
const keywords = std.StringHashMap(TokenType).init(&.{
.{"MATCH", .MATCH},
.{"CREATE", .CREATE},
.{"WHERE", .WHERE},
.{"RETURN", .RETURN},
// ... all ISO-defined keywords
});
Identifiers:
// Supports both simple and quoted identifiers
fn scanIdentifier(self: *Lexer) Token {
if (self.current == '`') {
return self.scanQuotedIdentifier();
}
return self.scanSimpleIdentifier();
}
Literals:
// Integer, float, string, boolean, null
fn scanLiteral(self: *Lexer) Token {
return switch (self.current) {
'0'...'9' => self.scanNumber(),
'\'' => self.scanString(),
'"' => self.scanString(),
else => self.scanKeywordOrIdentifier(),
};
}
Data Types (ISO §6)
Scalar Types:
// geode/src/types.zig
const ValueType = union(enum) {
null_type,
boolean: bool,
integer: i64,
float: f64,
decimal: Decimal,
string: []const u8,
bytes: []const u8,
date: Date,
time: Time,
datetime: DateTime,
duration: Duration,
list: []Value,
map: std.StringHashMap(Value),
node: NodeRef,
relationship: RelRef,
path: Path,
};
Type Inference:
fn inferType(expr: *Expr, context: *TypeContext) !ValueType {
return switch (expr.*) {
.literal => inferLiteralType(expr.literal),
.property => inferPropertyType(expr.property, context),
.function => inferFunctionType(expr.function, context),
.binary_op => inferBinaryOpType(expr.binary_op, context),
// ... all expression types
};
}
Pattern Matching (ISO §7)
Pattern Compilation:
// Convert pattern to execution plan
fn compilePattern(pattern: *Pattern) !ExecutionPlan {
var plan = ExecutionPlan.init();
for (pattern.elements) |element| {
switch (element) {
.node => try plan.addNodeScan(element.node),
.relationship => try plan.addRelTraversal(element.relationship),
.path => try plan.addPathMatch(element.path),
}
}
return plan;
}
Variable-Length Path:
// Implements ISO §7.4 variable-length paths
fn matchVariablePath(
start: NodeId,
end: NodeId,
rel_type: []const u8,
min_hops: usize,
max_hops: usize,
) ![]Path {
var paths = std.ArrayList(Path).init(allocator);
// Breadth-first search with hop limits
var queue = Queue.init();
queue.push(.{.node = start, .hops = 0, .path = Path.init()});
while (queue.pop()) |state| {
if (state.node == end and state.hops >= min_hops) {
try paths.append(state.path);
}
if (state.hops < max_hops) {
for (getOutgoingRels(state.node, rel_type)) |rel| {
var new_path = state.path.clone();
new_path.append(rel);
queue.push(.{
.node = rel.end_node,
.hops = state.hops + 1,
.path = new_path,
});
}
}
}
return paths.toOwnedSlice();
}
Query Optimization
Rule-Based Optimization:
// Apply optimization rules
const OptimizationRule = struct {
name: []const u8,
apply: *const fn(*QueryPlan) !bool,
};
const optimization_rules = [_]OptimizationRule{
.{ .name = "push_down_filters", .apply = pushDownFilters },
.{ .name = "merge_scans", .apply = mergeScans },
.{ .name = "index_selection", .apply = selectIndexes },
.{ .name = "join_reordering", .apply = reorderJoins },
};
fn optimize(plan: *QueryPlan) !void {
for (optimization_rules) |rule| {
var changed = true;
while (changed) {
changed = try rule.apply(plan);
}
}
}
Cost-Based Optimization:
// Estimate execution cost
fn estimateCost(plan: *ExecutionPlan, stats: *Statistics) f64 {
var total_cost: f64 = 0.0;
for (plan.operators) |op| {
total_cost += switch (op) {
.scan => estimateScanCost(op.scan, stats),
.filter => estimateFilterCost(op.filter, stats),
.join => estimateJoinCost(op.join, stats),
.aggregate => estimateAggregateCost(op.aggregate, stats),
};
}
return total_cost;
}
Transaction Management (ISO §13)
MVCC Implementation:
// Multi-Version Concurrency Control
const Transaction = struct {
id: TxnId,
start_ts: Timestamp,
isolation: IsolationLevel,
read_set: std.AutoHashMap(NodeId, Version),
write_set: std.AutoHashMap(NodeId, Value),
pub fn read(self: *Transaction, node_id: NodeId) !Value {
// Check write set first
if (self.write_set.get(node_id)) |value| {
return value;
}
// Read from snapshot
const version = try self.storage.getVersion(node_id, self.start_ts);
try self.read_set.put(node_id, version);
return version.value;
}
pub fn write(self: *Transaction, node_id: NodeId, value: Value) !void {
try self.write_set.put(node_id, value);
}
pub fn commit(self: *Transaction) !void {
// Validate read set (SSI)
try self.validateReadSet();
// Atomically commit write set
try self.storage.atomicWrite(self.write_set);
}
};
Serializable Snapshot Isolation:
// Detect write-write conflicts
fn validateReadSet(txn: *Transaction) !void {
for (txn.read_set.items()) |entry| {
const current_version = try storage.getLatestVersion(entry.key);
if (current_version.timestamp > txn.start_ts) {
// Read-write conflict detected
return error.SerializationFailure;
}
}
}
Index Implementation
B-tree Indexes (ISO §12):
// B-tree index for range queries
const BTreeIndex = struct {
root: *Node,
comparator: Comparator,
pub fn search(self: *BTreeIndex, key: Value) ![]NodeId {
var node = self.root;
while (true) {
const pos = node.findPosition(key, self.comparator);
if (node.is_leaf) {
return node.values[pos];
}
node = node.children[pos];
}
}
pub fn rangeQuery(
self: *BTreeIndex,
min: Value,
max: Value,
) !Iterator {
// Optimized range scanning
var iter = Iterator.init(self);
try iter.seekTo(min);
iter.setUpperBound(max);
return iter;
}
};
Hash Indexes:
// Hash index for equality lookups
const HashIndex = struct {
buckets: []Bucket,
hasher: Hasher,
pub fn lookup(self: *HashIndex, key: Value) ![]NodeId {
const hash = self.hasher.hash(key);
const bucket = &self.buckets[hash % self.buckets.len];
return bucket.get(key);
}
};
Function Implementation (ISO §11)
Aggregation Functions:
// COUNT, SUM, AVG, MIN, MAX
const AggregateFunction = union(enum) {
count: CountAgg,
sum: SumAgg,
avg: AvgAgg,
min: MinAgg,
max: MaxAgg,
collect: CollectAgg,
pub fn accumulate(self: *AggregateFunction, value: Value) !void {
switch (self.*) {
.count => |*agg| agg.count += 1,
.sum => |*agg| agg.sum += value.asNumber(),
.avg => |*agg| {
agg.sum += value.asNumber();
agg.count += 1;
},
.min => |*agg| agg.min = @min(agg.min, value),
.max => |*agg| agg.max = @max(agg.max, value),
.collect => |*agg| try agg.list.append(value),
}
}
pub fn finalize(self: *AggregateFunction) Value {
return switch (self.*) {
.count => |agg| Value.integer(agg.count),
.sum => |agg| Value.float(agg.sum),
.avg => |agg| Value.float(agg.sum / @as(f64, agg.count)),
.min => |agg| agg.min,
.max => |agg| agg.max,
.collect => |agg| Value.list(agg.list.items),
};
}
};
String Functions:
// UPPER, LOWER, TRIM, SUBSTRING, etc.
fn executeStringFunction(func: StringFunction, args: []Value) !Value {
return switch (func) {
.upper => Value.string(std.ascii.toUppercase(args[0].asString())),
.lower => Value.string(std.ascii.toLowercase(args[0].asString())),
.trim => Value.string(std.mem.trim(u8, args[0].asString(), " \t\n")),
.substring => {
const str = args[0].asString();
const start = args[1].asInteger();
const len = if (args.len > 2) args[2].asInteger() else str.len;
return Value.string(str[start..][0..len]);
},
// ... all ISO string functions
};
}
Testing Methodology
ISO Test Suite Integration:
// geode/tests/iso_compliance_test.zig
test "ISO §7.2: Node Pattern Matching" {
const query = "MATCH (n:User {age: 30}) RETURN n";
const result = try executeQuery(query);
try testing.expect(result.rows.len > 0);
}
test "ISO §8.1: CREATE Statement" {
const query = "CREATE (n:User {name: 'Alice', age: 30})";
try executeQuery(query);
// Verify created
const verify = "MATCH (n:User {name: 'Alice'}) RETURN n.age";
const result = try executeQuery(verify);
try testing.expectEqual(@as(i64, 30), result.rows[0].get("n.age").integer);
}
Property-Based Testing:
// Verify properties hold for random inputs
test "Property: MATCH is idempotent" {
var prng = std.rand.DefaultPrng.init(12345);
const rand = prng.random();
var i: usize = 0;
while (i < 1000) : (i += 1) {
const query = generateRandomMatchQuery(rand);
const result1 = try executeQuery(query);
const result2 = try executeQuery(query);
try testing.expectEqualSlices(Row, result1.rows, result2.rows);
}
}
Performance Optimizations
Query Caching:
const QueryCache = struct {
cache: std.StringHashMap(CompiledQuery),
pub fn get(self: *QueryCache, query: []const u8) ?*CompiledQuery {
return self.cache.get(query);
}
pub fn put(self: *QueryCache, query: []const u8, compiled: CompiledQuery) !void {
try self.cache.put(query, compiled);
}
};
Parallel Execution:
// Execute independent operations in parallel
fn executeParallel(ops: []Operation) ![]Result {
var tasks = try allocator.alloc(Task, ops.len);
defer allocator.free(tasks);
for (ops, 0..) |op, i| {
tasks[i] = async executeOperation(op);
}
var results = try allocator.alloc(Result, ops.len);
for (tasks, 0..) |task, i| {
results[i] = await task;
}
return results;
}
Best Practices
- Follow ISO Sections: Map code to ISO §X.Y for traceability
- Test Coverage: Verify all ISO test cases pass
- Type Safety: Leverage Zig’s compile-time type checking
- Error Handling: Use ISO-standard error codes
- Documentation: Reference ISO sections in comments
- Performance: Optimize while maintaining correctness
- Regression Testing: Prevent standard violations
Related Topics
- ISO/IEC 39075:2024: Official standard specification
- GQL Compliance: Conformance testing
- GQL Reference: Language documentation
- Architecture: System design
Further Reading
- ISO/IEC 39075:2024 - Standard specification
- GQL Compliance - Conformance details
- GQL Reference - Language reference
- GQL Syntax - Syntax guide
- Testing - Test methodology
Implementing ISO/IEC 39075:2024 requires careful attention to specification details, comprehensive testing, and continuous validation against the conformance profile to ensure interoperability.