Comprehensive guide to Geode’s implementation of ISO/IEC 39075:2024 GQL standard, covering architecture, design decisions, optimization strategies, and development methodology for standards-compliant graph database systems.

Implementation Architecture

Query Processing Pipeline

┌─────────────┐
│ GQL Query   │
└──────┬──────┘
       v
┌─────────────┐
│ Lexer       │ Tokenize input
└──────┬──────┘
       v
┌─────────────┐
│ Parser      │ Build AST (Abstract Syntax Tree)
└──────┬──────┘
       v
┌─────────────┐
│ Semantic    │ Type checking, name resolution
│ Analyzer    │
└──────┬──────┘
       v
┌─────────────┐
│ Optimizer   │ Query optimization, rewriting
└──────┬──────┘
       v
┌─────────────┐
│ Executor    │ Execute plan
└──────┬──────┘
       v
┌─────────────┐
│ Results     │
└─────────────┘

Core Components

Lexer (gql/src/lexer.zig):

  • Tokenizes GQL input into lexical units
  • Handles keywords, identifiers, literals, operators
  • Implements ISO §5 (Lexical elements)

Parser (gql/src/parser.zig):

  • Constructs Abstract Syntax Tree (AST)
  • Validates grammar per ISO §7-9
  • Error recovery and reporting

AST (gql/src/ast.zig):

  • Represents parsed query structure
  • Node types for all GQL constructs
  • Visitor pattern for traversal

Type System (geode/src/query/types.zig):

  • Implements ISO §6 data types
  • Type inference and checking
  • Type coercion rules

Optimizer (geode/src/query/optimizer.zig):

  • Cost-based query optimization
  • Pattern rewriting
  • Index selection

Executor (geode/src/query/executor.zig):

  • Interprets execution plan
  • Accesses storage engine
  • Produces result streams

Standards Implementation

Lexical Elements (ISO §5)

Keywords:

// geode/src/query/lexer.zig
const keywords = std.StringHashMap(TokenType).init(&.{
    .{"MATCH", .MATCH},
    .{"CREATE", .CREATE},
    .{"WHERE", .WHERE},
    .{"RETURN", .RETURN},
    // ... all ISO-defined keywords
});

Identifiers:

// Supports both simple and quoted identifiers
fn scanIdentifier(self: *Lexer) Token {
    if (self.current == '`') {
        return self.scanQuotedIdentifier();
    }
    return self.scanSimpleIdentifier();
}

Literals:

// Integer, float, string, boolean, null
fn scanLiteral(self: *Lexer) Token {
    return switch (self.current) {
        '0'...'9' => self.scanNumber(),
        '\'' => self.scanString(),
        '"' => self.scanString(),
        else => self.scanKeywordOrIdentifier(),
    };
}

Data Types (ISO §6)

Scalar Types:

// geode/src/types.zig
const ValueType = union(enum) {
    null_type,
    boolean: bool,
    integer: i64,
    float: f64,
    decimal: Decimal,
    string: []const u8,
    bytes: []const u8,
    date: Date,
    time: Time,
    datetime: DateTime,
    duration: Duration,
    list: []Value,
    map: std.StringHashMap(Value),
    node: NodeRef,
    relationship: RelRef,
    path: Path,
};

Type Inference:

fn inferType(expr: *Expr, context: *TypeContext) !ValueType {
    return switch (expr.*) {
        .literal => inferLiteralType(expr.literal),
        .property => inferPropertyType(expr.property, context),
        .function => inferFunctionType(expr.function, context),
        .binary_op => inferBinaryOpType(expr.binary_op, context),
        // ... all expression types
    };
}

Pattern Matching (ISO §7)

Pattern Compilation:

// Convert pattern to execution plan
fn compilePattern(pattern: *Pattern) !ExecutionPlan {
    var plan = ExecutionPlan.init();

    for (pattern.elements) |element| {
        switch (element) {
            .node => try plan.addNodeScan(element.node),
            .relationship => try plan.addRelTraversal(element.relationship),
            .path => try plan.addPathMatch(element.path),
        }
    }

    return plan;
}

Variable-Length Path:

// Implements ISO §7.4 variable-length paths
fn matchVariablePath(
    start: NodeId,
    end: NodeId,
    rel_type: []const u8,
    min_hops: usize,
    max_hops: usize,
) ![]Path {
    var paths = std.ArrayList(Path).init(allocator);

    // Breadth-first search with hop limits
    var queue = Queue.init();
    queue.push(.{.node = start, .hops = 0, .path = Path.init()});

    while (queue.pop()) |state| {
        if (state.node == end and state.hops >= min_hops) {
            try paths.append(state.path);
        }

        if (state.hops < max_hops) {
            for (getOutgoingRels(state.node, rel_type)) |rel| {
                var new_path = state.path.clone();
                new_path.append(rel);
                queue.push(.{
                    .node = rel.end_node,
                    .hops = state.hops + 1,
                    .path = new_path,
                });
            }
        }
    }

    return paths.toOwnedSlice();
}

Query Optimization

Rule-Based Optimization:

// Apply optimization rules
const OptimizationRule = struct {
    name: []const u8,
    apply: *const fn(*QueryPlan) !bool,
};

const optimization_rules = [_]OptimizationRule{
    .{ .name = "push_down_filters", .apply = pushDownFilters },
    .{ .name = "merge_scans", .apply = mergeScans },
    .{ .name = "index_selection", .apply = selectIndexes },
    .{ .name = "join_reordering", .apply = reorderJoins },
};

fn optimize(plan: *QueryPlan) !void {
    for (optimization_rules) |rule| {
        var changed = true;
        while (changed) {
            changed = try rule.apply(plan);
        }
    }
}

Cost-Based Optimization:

// Estimate execution cost
fn estimateCost(plan: *ExecutionPlan, stats: *Statistics) f64 {
    var total_cost: f64 = 0.0;

    for (plan.operators) |op| {
        total_cost += switch (op) {
            .scan => estimateScanCost(op.scan, stats),
            .filter => estimateFilterCost(op.filter, stats),
            .join => estimateJoinCost(op.join, stats),
            .aggregate => estimateAggregateCost(op.aggregate, stats),
        };
    }

    return total_cost;
}

Transaction Management (ISO §13)

MVCC Implementation:

// Multi-Version Concurrency Control
const Transaction = struct {
    id: TxnId,
    start_ts: Timestamp,
    isolation: IsolationLevel,
    read_set: std.AutoHashMap(NodeId, Version),
    write_set: std.AutoHashMap(NodeId, Value),

    pub fn read(self: *Transaction, node_id: NodeId) !Value {
        // Check write set first
        if (self.write_set.get(node_id)) |value| {
            return value;
        }

        // Read from snapshot
        const version = try self.storage.getVersion(node_id, self.start_ts);
        try self.read_set.put(node_id, version);
        return version.value;
    }

    pub fn write(self: *Transaction, node_id: NodeId, value: Value) !void {
        try self.write_set.put(node_id, value);
    }

    pub fn commit(self: *Transaction) !void {
        // Validate read set (SSI)
        try self.validateReadSet();

        // Atomically commit write set
        try self.storage.atomicWrite(self.write_set);
    }
};

Serializable Snapshot Isolation:

// Detect write-write conflicts
fn validateReadSet(txn: *Transaction) !void {
    for (txn.read_set.items()) |entry| {
        const current_version = try storage.getLatestVersion(entry.key);

        if (current_version.timestamp > txn.start_ts) {
            // Read-write conflict detected
            return error.SerializationFailure;
        }
    }
}

Index Implementation

B-tree Indexes (ISO §12):

// B-tree index for range queries
const BTreeIndex = struct {
    root: *Node,
    comparator: Comparator,

    pub fn search(self: *BTreeIndex, key: Value) ![]NodeId {
        var node = self.root;

        while (true) {
            const pos = node.findPosition(key, self.comparator);

            if (node.is_leaf) {
                return node.values[pos];
            }

            node = node.children[pos];
        }
    }

    pub fn rangeQuery(
        self: *BTreeIndex,
        min: Value,
        max: Value,
    ) !Iterator {
        // Optimized range scanning
        var iter = Iterator.init(self);
        try iter.seekTo(min);
        iter.setUpperBound(max);
        return iter;
    }
};

Hash Indexes:

// Hash index for equality lookups
const HashIndex = struct {
    buckets: []Bucket,
    hasher: Hasher,

    pub fn lookup(self: *HashIndex, key: Value) ![]NodeId {
        const hash = self.hasher.hash(key);
        const bucket = &self.buckets[hash % self.buckets.len];
        return bucket.get(key);
    }
};

Function Implementation (ISO §11)

Aggregation Functions:

// COUNT, SUM, AVG, MIN, MAX
const AggregateFunction = union(enum) {
    count: CountAgg,
    sum: SumAgg,
    avg: AvgAgg,
    min: MinAgg,
    max: MaxAgg,
    collect: CollectAgg,

    pub fn accumulate(self: *AggregateFunction, value: Value) !void {
        switch (self.*) {
            .count => |*agg| agg.count += 1,
            .sum => |*agg| agg.sum += value.asNumber(),
            .avg => |*agg| {
                agg.sum += value.asNumber();
                agg.count += 1;
            },
            .min => |*agg| agg.min = @min(agg.min, value),
            .max => |*agg| agg.max = @max(agg.max, value),
            .collect => |*agg| try agg.list.append(value),
        }
    }

    pub fn finalize(self: *AggregateFunction) Value {
        return switch (self.*) {
            .count => |agg| Value.integer(agg.count),
            .sum => |agg| Value.float(agg.sum),
            .avg => |agg| Value.float(agg.sum / @as(f64, agg.count)),
            .min => |agg| agg.min,
            .max => |agg| agg.max,
            .collect => |agg| Value.list(agg.list.items),
        };
    }
};

String Functions:

// UPPER, LOWER, TRIM, SUBSTRING, etc.
fn executeStringFunction(func: StringFunction, args: []Value) !Value {
    return switch (func) {
        .upper => Value.string(std.ascii.toUppercase(args[0].asString())),
        .lower => Value.string(std.ascii.toLowercase(args[0].asString())),
        .trim => Value.string(std.mem.trim(u8, args[0].asString(), " \t\n")),
        .substring => {
            const str = args[0].asString();
            const start = args[1].asInteger();
            const len = if (args.len > 2) args[2].asInteger() else str.len;
            return Value.string(str[start..][0..len]);
        },
        // ... all ISO string functions
    };
}

Testing Methodology

ISO Test Suite Integration:

// geode/tests/iso_compliance_test.zig
test "ISO §7.2: Node Pattern Matching" {
    const query = "MATCH (n:User {age: 30}) RETURN n";
    const result = try executeQuery(query);
    try testing.expect(result.rows.len > 0);
}

test "ISO §8.1: CREATE Statement" {
    const query = "CREATE (n:User {name: 'Alice', age: 30})";
    try executeQuery(query);

    // Verify created
    const verify = "MATCH (n:User {name: 'Alice'}) RETURN n.age";
    const result = try executeQuery(verify);
    try testing.expectEqual(@as(i64, 30), result.rows[0].get("n.age").integer);
}

Property-Based Testing:

// Verify properties hold for random inputs
test "Property: MATCH is idempotent" {
    var prng = std.rand.DefaultPrng.init(12345);
    const rand = prng.random();

    var i: usize = 0;
    while (i < 1000) : (i += 1) {
        const query = generateRandomMatchQuery(rand);

        const result1 = try executeQuery(query);
        const result2 = try executeQuery(query);

        try testing.expectEqualSlices(Row, result1.rows, result2.rows);
    }
}

Performance Optimizations

Query Caching:

const QueryCache = struct {
    cache: std.StringHashMap(CompiledQuery),

    pub fn get(self: *QueryCache, query: []const u8) ?*CompiledQuery {
        return self.cache.get(query);
    }

    pub fn put(self: *QueryCache, query: []const u8, compiled: CompiledQuery) !void {
        try self.cache.put(query, compiled);
    }
};

Parallel Execution:

// Execute independent operations in parallel
fn executeParallel(ops: []Operation) ![]Result {
    var tasks = try allocator.alloc(Task, ops.len);
    defer allocator.free(tasks);

    for (ops, 0..) |op, i| {
        tasks[i] = async executeOperation(op);
    }

    var results = try allocator.alloc(Result, ops.len);
    for (tasks, 0..) |task, i| {
        results[i] = await task;
    }

    return results;
}

Best Practices

  1. Follow ISO Sections: Map code to ISO §X.Y for traceability
  2. Test Coverage: Verify all ISO test cases pass
  3. Type Safety: Leverage Zig’s compile-time type checking
  4. Error Handling: Use ISO-standard error codes
  5. Documentation: Reference ISO sections in comments
  6. Performance: Optimize while maintaining correctness
  7. Regression Testing: Prevent standard violations
  • ISO/IEC 39075:2024: Official standard specification
  • GQL Compliance: Conformance testing
  • GQL Reference: Language documentation
  • Architecture: System design

Further Reading

Implementing ISO/IEC 39075:2024 requires careful attention to specification details, comprehensive testing, and continuous validation against the conformance profile to ensure interoperability.


Related Articles