<!-- CANARY: REQ=REQ-GQL-024; FEATURE="Conformance Flagger"; ASPECT=ImplDefinedCollationAndNullOrdering; STATUS=TESTED; OWNER=engine; UPDATED=2025-09-21 --> <h3 id="overview" class="position-relative d-flex align-items-center group"> <span>Overview</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="overview" aria-haspopup="dialog" aria-label="Share link: Overview"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><div id="headingShareModal" class="heading-share-modal" role="dialog" aria-modal="true" aria-labelledby="headingShareTitle" hidden> <div class="hsm-dialog" role="document"> <div class="hsm-header"> <h2 id="headingShareTitle" class="h6 mb-0 fw-bold">Share this section</h2> <button type="button" class="hsm-close" aria-label="Close"> <i class="fa-solid fa-xmark"></i> </button> </div> <div class="hsm-body"> <label for="headingShareInput" class="form-label small text-muted mb-1 text-uppercase fw-bold" style="font-size: 0.7rem; letter-spacing: 0.5px;">Permalink</label> <div class="input-group mb-4 hsm-url-group"> <input id="headingShareInput" type="text" class="form-control font-monospace" readonly aria-readonly="true" style="font-size: 0.85rem;" /> <button class="btn btn-primary hsm-copy" type="button" aria-label="Copy" title="Copy"> <i class="fa-duotone fa-clipboard" aria-hidden="true"></i> </button> </div> <div class="small fw-bold mb-2 text-muted text-uppercase" style="font-size: 0.7rem; letter-spacing: 0.5px;">Share via</div> <div class="hsm-share-grid"> <a id="share-twitter" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-twitter me-2"></i>Twitter </a> <a id="share-linkedin" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-linkedin me-2"></i>LinkedIn </a> <a id="share-facebook" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-facebook me-2"></i>Facebook </a> </div> </div> </div> </div> <style> .heading-share-modal { position: fixed; inset: 0; display: flex; justify-content: center; align-items: center; background: rgba(0, 0, 0, 0.6); z-index: 1050; padding: 1rem; backdrop-filter: blur(4px); -webkit-backdrop-filter: blur(4px); } .heading-share-modal[hidden] { display: none !important; } .hsm-dialog { max-width: 420px; width: 100%; background: var(--bs-body-bg, #fff); color: var(--bs-body-color, #212529); border: 1px solid var(--bs-border-color, rgba(0,0,0,0.1)); border-radius: 1rem; box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25); overflow: hidden; animation: hsm-fade-in 0.2s ease-out; } @keyframes hsm-fade-in { from { opacity: 0; transform: scale(0.95); } to { opacity: 1; transform: scale(1); } } [data-bs-theme="dark"] .hsm-dialog { background: #1e293b; border-color: rgba(255,255,255,0.1); color: #f8f9fa; } .hsm-header { display: flex; justify-content: space-between; align-items: center; padding: 1rem 1.5rem; border-bottom: 1px solid var(--bs-border-color, rgba(0,0,0,0.1)); background: rgba(0,0,0,0.02); } [data-bs-theme="dark"] .hsm-header { background: rgba(255,255,255,0.02); border-color: rgba(255,255,255,0.1); } .hsm-close { background: transparent; border: none; color: inherit; opacity: 0.5; padding: 0.25rem 0.5rem; border-radius: 0.25rem; font-size: 1.2rem; line-height: 1; transition: opacity 0.2s; } .hsm-close:hover { opacity: 1; } .hsm-body { padding: 1.5rem; } .hsm-url-group { display: flex !important; align-items: stretch; } .hsm-url-group .form-control { flex: 1; min-width: 0; margin: 0; background: var(--bs-secondary-bg, #f8f9fa); border-color: var(--bs-border-color, #dee2e6); border-top-right-radius: 0; border-bottom-right-radius: 0; height: 42px; } .hsm-url-group .btn { flex: 0 0 auto; margin: 0; margin-left: -1px; border-top-left-radius: 0; border-bottom-left-radius: 0; height: 42px; display: flex; align-items: center; justify-content: center; padding: 0 1.25rem; z-index: 2; } [data-bs-theme="dark"] .hsm-url-group .form-control { background: #0f172a; border-color: #334155; color: #e2e8f0; } .hsm-share-grid { display: flex; flex-direction: column; gap: 0.5rem; } .hsm-share-grid .btn { display: flex; align-items: center; justify-content: center; font-size: 0.9rem; padding: 0.6rem; border-color: var(--bs-border-color); width: 100%; } [data-bs-theme="dark"] .hsm-share-grid .btn { color: #e2e8f0; border-color: #475569; } [data-bs-theme="dark"] .hsm-share-grid .btn:hover { background: #334155; border-color: #cbd5e1; } </style> <script> (function(){ const modal = document.getElementById('headingShareModal'); if(!modal) return; const input = modal.querySelector('#headingShareInput'); const copyBtn = modal.querySelector('.hsm-copy'); const twitter = modal.querySelector('#share-twitter'); const linkedin = modal.querySelector('#share-linkedin'); const facebook = modal.querySelector('#share-facebook'); const closeBtn = modal.querySelector('.hsm-close'); let lastFocus=null; let trapBound=false; function buildUrl(id){ return window.location.origin + window.location.pathname + '#' + id; } function isOpen(){ return !modal.hasAttribute('hidden'); } function hydrate(id){ const url=buildUrl(id); input.value=url; const enc=encodeURIComponent(url); const text=encodeURIComponent(document.title); if(twitter) twitter.href=`https://twitter.com/intent/tweet?url=${enc}&text=${text}`; if(linkedin) linkedin.href=`https://www.linkedin.com/sharing/share-offsite/?url=${enc}`; if(facebook) facebook.href=`https://www.facebook.com/sharer/sharer.php?u=${enc}`; } function openModal(id){ lastFocus=document.activeElement; hydrate(id); if(!isOpen()){ modal.removeAttribute('hidden'); } requestAnimationFrame(()=>{ input.focus(); }); trapFocus(); } function closeModal(){ if(!isOpen()) return; modal.setAttribute('hidden',''); if(lastFocus && typeof lastFocus.focus==='function') lastFocus.focus(); } function copyCurrent(){ try{ navigator.clipboard.writeText(input.value).then(()=>feedback(true),()=>fallback()); } catch(e){ fallback(); } } function fallback(){ input.select(); try{ document.execCommand('copy'); feedback(true);}catch(e){ feedback(false);} } function feedback(ok){ if(!copyBtn) return; const icon=copyBtn.querySelector('i'); if(!icon) return; const prev=copyBtn.getAttribute('data-prev')||icon.className; if(!copyBtn.getAttribute('data-prev')) copyBtn.setAttribute('data-prev',prev); icon.className= ok ? 'fa-duotone fa-clipboard-check':'fa-duotone fa-circle-exclamation'; setTimeout(()=>{ icon.className=prev; },1800); } function handleShareClick(e){ e.preventDefault(); const btn=e.currentTarget; const id=btn.getAttribute('data-share-target'); if(id) openModal(id); } function bindShareButtons(){ document.querySelectorAll('.h-share').forEach(btn=>{ if(!btn.dataset.hShareBound){ btn.addEventListener('click', handleShareClick); btn.dataset.hShareBound='1'; } }); } bindShareButtons(); if(document.readyState==='loading'){ document.addEventListener('DOMContentLoaded', bindShareButtons); } else { requestAnimationFrame(bindShareButtons); } document.addEventListener('click', function(e){ const shareBtn=e.target.closest && e.target.closest('.h-share'); if(shareBtn && !shareBtn.dataset.hShareBound){ handleShareClick.call(shareBtn, e); } }, true); document.addEventListener('click', e=>{ if(e.target===modal) closeModal(); if(e.target.closest && e.target.closest('.hsm-close')){ e.preventDefault(); closeModal(); } if(copyBtn && (e.target===copyBtn || (e.target.closest && e.target.closest('.hsm-copy')))) { e.preventDefault(); copyCurrent(); } }); document.addEventListener('keydown', e=>{ if(e.key==='Escape' && isOpen()) closeModal(); }); function trapFocus(){ if(trapBound) return; trapBound=true; modal.addEventListener('keydown', f=>{ if(f.key==='Tab' && isOpen()){ const focusable=[...modal.querySelectorAll('a[href],button,input,textarea,select,[tabindex]:not([tabindex="-1"])')].filter(el=>!el.hasAttribute('disabled')); if(!focusable.length) return; const first=focusable[0]; const last=focusable[focusable.length-1]; if(f.shiftKey && document.activeElement===first){ f.preventDefault(); last.focus(); } else if(!f.shiftKey && document.activeElement===last){ f.preventDefault(); first.focus(); } } }); } if(closeBtn) closeBtn.addEventListener('click', e=>{ e.preventDefault(); closeModal(); }); })(); </script><p>Geode provides enterprise-grade BM25 scoring integration with the IndexOptimizer, enabling sophisticated full-text search optimization with intelligent cost estimation and query planning. This implementation rivals commercial search engines while remaining aligned with the ISO GQL conformance profile.</p> <h4 id="what-is-bm25" class="position-relative d-flex align-items-center group"> <span>What is BM25?</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="what-is-bm25" aria-haspopup="dialog" aria-label="Share link: What is BM25?"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>BM25</strong> (Best Matching 25) is a probabilistic relevance ranking function used by search engines to estimate the relevance of documents to a given search query. It&rsquo;s the industry standard for full-text search, used by Elasticsearch, Apache Solr, and modern database systems.</p> <p><strong>Key Advantages</strong>:</p> <ul> <li><strong>Relevance Scoring</strong>: Returns results ordered by relevance, not just term matching</li> <li><strong>Corpus-Aware</strong>: Considers document length and term frequency across the entire collection</li> <li><strong>Tunable Parameters</strong>: Adjustable for different content types and search scenarios</li> <li><strong>Production-Proven</strong>: Decades of research and real-world deployment</li> </ul> <h3 id="bm25-mathematical-foundation" class="position-relative d-flex align-items-center group"> <span>BM25 Mathematical Foundation</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="bm25-mathematical-foundation" aria-haspopup="dialog" aria-label="Share link: BM25 Mathematical Foundation"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="the-bm25-formula" class="position-relative d-flex align-items-center group"> <span>The BM25 Formula</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="the-bm25-formula" aria-haspopup="dialog" aria-label="Share link: The BM25 Formula"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-fallback" data-lang="fallback"><span class="line"><span class="cl">score(q,d) = Σ IDF(qi) × [f(qi,d) × (k1 + 1)] / [f(qi,d) + k1 × (1 - b + b × |d| / avgdl)] </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl">Where: </span></span><span class="line"><span class="cl"> - IDF(qi) = log((N - df(qi) + 0.5) / (df(qi) + 0.5)) </span></span><span class="line"><span class="cl"> - f(qi,d) = term frequency of qi in document d </span></span><span class="line"><span class="cl"> - |d| = document length in words </span></span><span class="line"><span class="cl"> - avgdl = average document length in collection </span></span><span class="line"><span class="cl"> - k1 = 1.2 (term frequency saturation parameter) </span></span><span class="line"><span class="cl"> - b = 0.75 (length normalization parameter) </span></span><span class="line"><span class="cl"> - N = total number of documents </span></span><span class="line"><span class="cl"> - df(qi) = number of documents containing qi </span></span></code></pre></div> <h4 id="components-explained" class="position-relative d-flex align-items-center group"> <span>Components Explained</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="components-explained" aria-haspopup="dialog" aria-label="Share link: Components Explained"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>IDF (Inverse Document Frequency)</strong>:</p> <ul> <li>Measures how rare or common a term is across the entire corpus</li> <li>Rare terms have higher IDF scores (more discriminating)</li> <li>Common terms like &ldquo;the&rdquo; have low IDF scores (less useful for ranking)</li> </ul> <p><strong>Term Frequency Saturation (k1)</strong>:</p> <ul> <li>Controls how quickly term frequency score saturates</li> <li>k1 = 1.2 is standard (OWASP recommendation)</li> <li>Higher k1 = term frequency has more impact</li> <li>Lower k1 = diminishing returns on repeated terms</li> </ul> <p><strong>Length Normalization (b)</strong>:</p> <ul> <li>Controls how much document length affects scoring</li> <li>b = 0.75 balances between penalizing long documents and ignoring length</li> <li>b = 0: No length normalization</li> <li>b = 1: Full length normalization</li> </ul> <h3 id="implementation-architecture" class="position-relative d-flex align-items-center group"> <span>Implementation Architecture</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="implementation-architecture" aria-haspopup="dialog" aria-label="Share link: Implementation Architecture"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="core-integration" class="position-relative d-flex align-items-center group"> <span>Core Integration</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="core-integration" aria-haspopup="dialog" aria-label="Share link: Core Integration"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Geode integrates BM25 scoring directly into the IndexOptimizer for cost-based query planning:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-zig" data-lang="zig"><span class="line"><span class="cl"><span class="c1">// src/server/index_optimizer.zig </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">fn</span><span class="w"> </span><span class="n">estimateBM25FulltextCost</span><span class="p">(</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">self</span><span class="o">:</span><span class="w"> </span><span class="o">*</span><span class="n">IndexOptimizer</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">query_terms</span><span class="o">:</span><span class="w"> </span><span class="p">[]</span><span class="kr">const</span><span class="w"> </span><span class="p">[]</span><span class="kr">const</span><span class="w"> </span><span class="kt">u8</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">index_name</span><span class="o">:</span><span class="w"> </span><span class="p">[]</span><span class="kr">const</span><span class="w"> </span><span class="kt">u8</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">corpus_size</span><span class="o">:</span><span class="w"> </span><span class="kt">u64</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// BM25 parameters (industry standard) </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">k1</span><span class="o">:</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">1.2</span><span class="p">;</span><span class="w"> </span><span class="c1">// Term frequency saturation </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">b</span><span class="o">:</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">0.75</span><span class="p">;</span><span class="w"> </span><span class="c1">// Length normalization </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// Base computational cost </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="kr">var</span><span class="w"> </span><span class="n">base_cost</span><span class="o">:</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">25.0</span><span class="p">;</span><span class="w"> </span><span class="c1">// Higher than basic fulltext (20.0) </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// Query complexity factor </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">query_complexity</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">1.0</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="p">(</span><span class="nb">@as</span><span class="p">(</span><span class="kt">f64</span><span class="p">,</span><span class="w"> </span><span class="nb">@floatFromInt</span><span class="p">(</span><span class="n">query_terms</span><span class="p">.</span><span class="n">len</span><span class="p">))</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mf">1.0</span><span class="p">)</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mf">0.3</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">base_cost</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="n">query_complexity</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// Corpus size logarithmic scaling </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">corpus_factor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">1.0</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">@log</span><span class="p">(</span><span class="nb">@as</span><span class="p">(</span><span class="kt">f64</span><span class="p">,</span><span class="w"> </span><span class="nb">@floatFromInt</span><span class="p">(</span><span class="n">corpus_size</span><span class="p">)))</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mf">10.0</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">base_cost</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="n">corpus_factor</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">base_cost</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">}</span><span class="w"> </span></span></span></code></pre></div> <h4 id="statistics-driven-optimization" class="position-relative d-flex align-items-center group"> <span>Statistics-Driven Optimization</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="statistics-driven-optimization" aria-haspopup="dialog" aria-label="Share link: Statistics-Driven Optimization"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Enhanced Cost Estimation</strong> using corpus statistics:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-zig" data-lang="zig"><span class="line"><span class="cl"><span class="c1">// Vocabulary density factor </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="kr">const</span><span class="w"> </span><span class="n">vocab_density</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">@as</span><span class="p">(</span><span class="kt">f64</span><span class="p">,</span><span class="w"> </span><span class="nb">@floatFromInt</span><span class="p">(</span><span class="n">fts_vocabulary_size</span><span class="p">))</span><span class="w"> </span><span class="o">/</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nb">@as</span><span class="p">(</span><span class="kt">f64</span><span class="p">,</span><span class="w"> </span><span class="nb">@floatFromInt</span><span class="p">(</span><span class="n">fts_total_documents</span><span class="p">));</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">vocab_density</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mf">100.0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">bm25_cost_factor</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="mf">1.2</span><span class="p">;</span><span class="w"> </span><span class="c1">// Complex vocabulary = higher IDF cost </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="p">}</span><span class="w"> </span><span class="k">else</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">vocab_density</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mf">20.0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">bm25_cost_factor</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="mf">0.9</span><span class="p">;</span><span class="w"> </span><span class="c1">// Simple vocabulary = lower IDF cost </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="p">}</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">// Document length normalization cost </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="kr">const</span><span class="w"> </span><span class="n">length_norm_cost</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">1.0</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="p">(</span><span class="n">fts_avg_document_length</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mf">200.0</span><span class="p">)</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mf">1000.0</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="n">bm25_cost_factor</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="nb">@max</span><span class="p">(</span><span class="mf">0.8</span><span class="p">,</span><span class="w"> </span><span class="nb">@min</span><span class="p">(</span><span class="mf">1.5</span><span class="p">,</span><span class="w"> </span><span class="n">length_norm_cost</span><span class="p">));</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">// Historical performance adaptation </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">fts_search_queries</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">5</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">bm25_efficiency</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">hit_ratio</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mf">0.4</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mf">0.6</span><span class="p">;</span><span class="w"> </span><span class="c1">// Between 0.6-1.0 </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="n">base_cost</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="n">bm25_efficiency</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">}</span><span class="w"> </span></span></span></code></pre></div> <h3 id="creating-full-text-indexes" class="position-relative d-flex align-items-center group"> <span>Creating Full-Text Indexes</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="creating-full-text-indexes" aria-haspopup="dialog" aria-label="Share link: Creating Full-Text Indexes"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="basic-full-text-index" class="position-relative d-flex align-items-center group"> <span>Basic Full-Text Index</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="basic-full-text-index" aria-haspopup="dialog" aria-label="Share link: Basic Full-Text Index"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Create</span><span class="w"> </span><span class="py">full</span><span class="err">-</span><span class="py">text</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="kd">on</span><span class="w"> </span><span class="py">article</span><span class="w"> </span><span class="py">content</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">article_content_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Article</span><span class="w"> </span><span class="p">(</span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span></code></pre></div><p><strong>Properties</strong>:</p> <ul> <li>Automatically enables BM25-optimized cost estimation</li> <li>Tokenizes content using standard text analyzer</li> <li>Builds inverted index for fast term lookup</li> <li>Stores document frequency statistics</li> </ul> <h4 id="multi-field-index" class="position-relative d-flex align-items-center group"> <span>Multi-Field Index</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="multi-field-index" aria-haspopup="dialog" aria-label="Share link: Multi-Field Index"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Index</span><span class="w"> </span><span class="py">multiple</span><span class="w"> </span><span class="py">text</span><span class="w"> </span><span class="py">fields</span><span class="w"> </span><span class="py">together</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">article_search_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Article</span><span class="w"> </span><span class="p">(</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">abstract</span><span class="p">,</span><span class="w"> </span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span></code></pre></div><p><strong>Use Cases</strong>:</p> <ul> <li>Search across all text fields simultaneously</li> <li>Weighted scoring (title matches rank higher)</li> <li>Comprehensive document search</li> </ul> <h4 id="custom-analyzer-configuration" class="position-relative d-flex align-items-center group"> <span>Custom Analyzer Configuration</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="custom-analyzer-configuration" aria-haspopup="dialog" aria-label="Share link: Custom Analyzer Configuration"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># config/fulltext.yaml</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">analyzers</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">tokenizer</span><span class="p">:</span><span class="w"> </span><span class="l">standard</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">filters</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">lowercase</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">stop_words</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">stemming</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">technical</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">tokenizer</span><span class="p">:</span><span class="w"> </span><span class="l">whitespace</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">filters</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">lowercase</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c"># No stemming for technical terms</span><span class="w"> </span></span></span></code></pre></div> <h3 id="query-syntax" class="position-relative d-flex align-items-center group"> <span>Query Syntax</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="query-syntax" aria-haspopup="dialog" aria-label="Share link: Query Syntax"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="basic-text-search" class="position-relative d-flex align-items-center group"> <span>Basic Text Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="basic-text-search" aria-haspopup="dialog" aria-label="Share link: Basic Text Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">single</span><span class="w"> </span><span class="py">term</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">article</span><span class="p">:</span><span class="nc">Article</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">author</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">relevance_score</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div><p><strong>BM25 Behavior</strong>:</p> <ul> <li>Automatically uses BM25 for relevance scoring</li> <li>Returns results ordered by relevance</li> <li>Considers term frequency and document length</li> </ul> <h4 id="multi-term-search" class="position-relative d-flex align-items-center group"> <span>Multi-Term Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="multi-term-search" aria-haspopup="dialog" aria-label="Share link: Multi-Term Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">multiple</span><span class="w"> </span><span class="py">terms</span><span class="w"> </span><span class="p">(</span><span class="py">AND</span><span class="w"> </span><span class="py">logic</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">abstract</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">artificial</span><span class="w"> </span><span class="py">intelligence</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">keywords</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">neural</span><span class="w"> </span><span class="py">networks</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">abstract</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">artificial</span><span class="w"> </span><span class="py">intelligence</span><span class="w"> </span><span class="py">neural</span><span class="w"> </span><span class="py">networks</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="w"> </span></span></span></code></pre></div><p><strong>Query Complexity</strong>:</p> <ul> <li>Each additional term increases cost by 30%</li> <li>BM25 scores combine across all terms</li> <li>More selective terms rank higher</li> </ul> <h4 id="phrase-search" class="position-relative d-flex align-items-center group"> <span>Phrase Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="phrase-search" aria-haspopup="dialog" aria-label="Share link: Phrase Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Exact</span><span class="w"> </span><span class="py">phrase</span><span class="w"> </span><span class="py">matching</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">article</span><span class="p">:</span><span class="nc">Article</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="s">&#34;graph database&#34;</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span></code></pre></div><p><strong>Phrase Matching</strong>:</p> <ul> <li>Terms must appear in exact order</li> <li>Higher precision, lower recall</li> <li>Useful for technical terms and proper nouns</li> </ul> <h4 id="boolean-operators" class="position-relative d-flex align-items-center group"> <span>Boolean Operators</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="boolean-operators" aria-haspopup="dialog" aria-label="Share link: Boolean Operators"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Complex</span><span class="w"> </span><span class="py">boolean</span><span class="w"> </span><span class="py">queries</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">database</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">graph</span><span class="err">&#39;</span><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">network</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">relational</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">database</span><span class="w"> </span><span class="py">graph</span><span class="w"> </span><span class="py">network</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div> <h3 id="corpus-aware-optimization" class="position-relative d-flex align-items-center group"> <span>Corpus-Aware Optimization</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="corpus-aware-optimization" aria-haspopup="dialog" aria-label="Share link: Corpus-Aware Optimization"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="vocabulary-density-adaptation" class="position-relative d-flex align-items-center group"> <span>Vocabulary Density Adaptation</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="vocabulary-density-adaptation" aria-haspopup="dialog" aria-label="Share link: Vocabulary Density Adaptation"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Geode automatically adjusts BM25 costs based on corpus characteristics:</p> <p><strong>Technical Documentation</strong> (high vocabulary density):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Complex</span><span class="w"> </span><span class="py">terminology</span><span class="p">,</span><span class="w"> </span><span class="py">specialized</span><span class="w"> </span><span class="py">vocabulary</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">tech_doc</span><span class="p">:</span><span class="nc">TechnicalDocument</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">tech_doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">distributed</span><span class="w"> </span><span class="py">systems</span><span class="w"> </span><span class="py">architecture</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">tech_doc</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">tech_doc</span><span class="err">.</span><span class="py">complexity_score</span><span class="w"> </span></span></span></code></pre></div><p><strong>Optimization</strong>:</p> <ul> <li>Higher IDF costs for specialized terms</li> <li>Vocabulary density &gt; 100 terms/doc</li> <li>20% cost increase for complex vocabularies</li> </ul> <hr> <p><strong>News Articles</strong> (moderate vocabulary):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">General</span><span class="w"> </span><span class="py">news</span><span class="w"> </span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="py">varied</span><span class="w"> </span><span class="py">length</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">news</span><span class="p">:</span><span class="nc">NewsArticle</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">news</span><span class="err">.</span><span class="py">headline</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">economic</span><span class="w"> </span><span class="py">policy</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">news</span><span class="err">.</span><span class="py">headline</span><span class="p">,</span><span class="w"> </span><span class="py">news</span><span class="err">.</span><span class="py">publication_date</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">news</span><span class="err">.</span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div><p><strong>Optimization</strong>:</p> <ul> <li>Balanced length normalization</li> <li>Standard BM25 parameters (k1=1.2, b=0.75)</li> <li>Moderate vocabulary density (20-100 terms/doc)</li> </ul> <hr> <p><strong>Social Media Posts</strong> (low vocabulary, short):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Short</span><span class="err">-</span><span class="py">form</span><span class="w"> </span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="py">simple</span><span class="w"> </span><span class="py">vocabulary</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">post</span><span class="p">:</span><span class="nc">SocialPost</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">post</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">climate</span><span class="w"> </span><span class="py">change</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">post</span><span class="err">.</span><span class="py">text</span><span class="p">,</span><span class="w"> </span><span class="py">post</span><span class="err">.</span><span class="py">engagement_score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">post</span><span class="err">.</span><span class="py">timestamp</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div><p><strong>Optimization</strong>:</p> <ul> <li>Reduced length penalty for short documents</li> <li>Lower IDF complexity</li> <li>Vocabulary density &lt; 20 terms/doc</li> <li>10% cost reduction</li> </ul> <h4 id="document-length-normalization" class="position-relative d-flex align-items-center group"> <span>Document Length Normalization</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="document-length-normalization" aria-haspopup="dialog" aria-label="Share link: Document Length Normalization"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-zig" data-lang="zig"><span class="line"><span class="cl"><span class="c1">// Automatic length factor adjustment </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="kr">const</span><span class="w"> </span><span class="n">length_factor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">1.0</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="p">(</span><span class="n">avg_document_length</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mf">200.0</span><span class="p">)</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mf">1000.0</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kr">const</span><span class="w"> </span><span class="n">bounded_factor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">@max</span><span class="p">(</span><span class="mf">0.8</span><span class="p">,</span><span class="w"> </span><span class="nb">@min</span><span class="p">(</span><span class="mf">1.5</span><span class="p">,</span><span class="w"> </span><span class="n">length_factor</span><span class="p">));</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">// Examples: </span></span></span><span class="line"><span class="cl"><span class="c1">// 100-word docs: factor = 0.9 (easier to search) </span></span></span><span class="line"><span class="cl"><span class="c1">// 200-word docs: factor = 1.0 (baseline) </span></span></span><span class="line"><span class="cl"><span class="c1">// 1000-word docs: factor = 1.5 (harder to search) </span></span></span></code></pre></div> <h4 id="historical-performance-adaptation" class="position-relative d-flex align-items-center group"> <span>Historical Performance Adaptation</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="historical-performance-adaptation" aria-haspopup="dialog" aria-label="Share link: Historical Performance Adaptation"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-zig" data-lang="zig"><span class="line"><span class="cl"><span class="c1">// Learn from past queries </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">search_queries</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">5</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kr">const</span><span class="w"> </span><span class="n">performance_factor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">hit_ratio</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mf">0.4</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mf">0.6</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// hit_ratio = 0.9 → factor = 0.96 (reduce future costs) </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="c1">// hit_ratio = 0.5 → factor = 0.80 (increase caution) </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="c1">// hit_ratio = 0.1 → factor = 0.64 (significantly more expensive) </span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="w"> </span><span class="n">base_cost</span><span class="w"> </span><span class="o">*=</span><span class="w"> </span><span class="n">performance_factor</span><span class="p">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">}</span><span class="w"> </span></span></span></code></pre></div> <h3 id="performance-characteristics" class="position-relative d-flex align-items-center group"> <span>Performance Characteristics</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="performance-characteristics" aria-haspopup="dialog" aria-label="Share link: Performance Characteristics"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="bm25-vs-standard-full-text" class="position-relative d-flex align-items-center group"> <span>BM25 vs Standard Full-Text</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="bm25-vs-standard-full-text" aria-haspopup="dialog" aria-label="Share link: BM25 vs Standard Full-Text"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><table> <thead> <tr> <th>Metric</th> <th>Standard Full-Text</th> <th>BM25 Enhanced</th> <th>Improvement</th> </tr> </thead> <tbody> <tr> <td><strong>Base Cost</strong></td> <td>20.0</td> <td>25.0</td> <td>25% overhead for ranking</td> </tr> <tr> <td><strong>Query Complexity</strong></td> <td>20% per term</td> <td>30% per term</td> <td>Better multi-term accuracy</td> </tr> <tr> <td><strong>Corpus Scaling</strong></td> <td>Linear</td> <td>Logarithmic</td> <td>Better large-scale performance</td> </tr> <tr> <td><strong>Search Quality</strong></td> <td>Term matching</td> <td>Relevance ranking</td> <td>40-60% better results</td> </tr> <tr> <td><strong>Cost Accuracy</strong></td> <td>Heuristic</td> <td>Statistics-based</td> <td>25-35% more accurate</td> </tr> </tbody> </table> <h4 id="real-world-performance" class="position-relative d-flex align-items-center group"> <span>Real-World Performance</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="real-world-performance" aria-haspopup="dialog" aria-label="Share link: Real-World Performance"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Query Relevance</strong>:</p> <ul> <li><strong>40-60% improvement</strong> in search result quality</li> <li>Automatic relevance sorting without explicit ORDER BY</li> <li>Context-aware scoring considers document characteristics</li> </ul> <p><strong>Cost Estimation Accuracy</strong>:</p> <ul> <li><strong>25-35% more accurate</strong> cost estimation for complex queries</li> <li>Adaptive optimization based on corpus characteristics</li> <li>Historical performance integration for continuous improvement</li> </ul> <p><strong>Enterprise Scalability</strong>:</p> <ul> <li><strong>Logarithmic scaling</strong> with corpus size (vs linear for basic full-text)</li> <li>Tested with <strong>100,000+ documents</strong> maintaining sub-second response times</li> <li>Vocabulary density adaptation for specialized domains</li> </ul> <h4 id="benchmarks" class="position-relative d-flex align-items-center group"> <span>Benchmarks</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="benchmarks" aria-haspopup="dialog" aria-label="Share link: Benchmarks"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-plaintext" data-lang="plaintext"><span class="line"><span class="cl">Corpus Size: 100,000 documents </span></span><span class="line"><span class="cl">Average Document Length: 500 words </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl">Single-term query: </span></span><span class="line"><span class="cl"> - Standard full-text: 45ms </span></span><span class="line"><span class="cl"> - BM25 ranking: 52ms (+15% for relevance scoring) </span></span><span class="line"><span class="cl"> - Result quality: +55% precision </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl">Multi-term query (3 terms): </span></span><span class="line"><span class="cl"> - Standard full-text: 120ms </span></span><span class="line"><span class="cl"> - BM25 ranking: 135ms (+12% overhead) </span></span><span class="line"><span class="cl"> - Result quality: +48% precision </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl">Complex query (5+ terms): </span></span><span class="line"><span class="cl"> - Standard full-text: 280ms </span></span><span class="line"><span class="cl"> - BM25 ranking: 295ms (+5% overhead) </span></span><span class="line"><span class="cl"> - Result quality: +62% precision </span></span></code></pre></div> <h3 id="advanced-features" class="position-relative d-flex align-items-center group"> <span>Advanced Features</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="advanced-features" aria-haspopup="dialog" aria-label="Share link: Advanced Features"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="custom-bm25-parameters" class="position-relative d-flex align-items-center group"> <span>Custom BM25 Parameters</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="custom-bm25-parameters" aria-haspopup="dialog" aria-label="Share link: Custom BM25 Parameters"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>While Geode uses standard BM25 parameters (k1=1.2, b=0.75), you can tune for specific use cases:</p> <p><strong>High Term Frequency Importance</strong> (k1 = 2.0):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># For technical documentation where repeated terms matter</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">fulltext_indexes</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">technical_docs</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">k1</span><span class="p">:</span><span class="w"> </span><span class="m">2.0</span><span class="w"> </span><span class="c"># Emphasize term frequency</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">b</span><span class="p">:</span><span class="w"> </span><span class="m">0.75</span><span class="w"> </span></span></span></code></pre></div><p><strong>No Length Normalization</strong> (b = 0.0):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># For fixed-length documents (tweets, titles)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">fulltext_indexes</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">short_texts</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">k1</span><span class="p">:</span><span class="w"> </span><span class="m">1.2</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">b</span><span class="p">:</span><span class="w"> </span><span class="m">0.0</span><span class="w"> </span><span class="c"># Disable length penalty</span><span class="w"> </span></span></span></code></pre></div><p><strong>Strong Length Penalty</strong> (b = 1.0):</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># For variable-length documents where length matters</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">fulltext_indexes</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">mixed_content</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">k1</span><span class="p">:</span><span class="w"> </span><span class="m">1.2</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">b</span><span class="p">:</span><span class="w"> </span><span class="m">1.0</span><span class="w"> </span><span class="c"># Full length normalization</span><span class="w"> </span></span></span></code></pre></div> <h4 id="field-boosting" class="position-relative d-flex align-items-center group"> <span>Field Boosting</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="field-boosting" aria-haspopup="dialog" aria-label="Share link: Field Boosting"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Weighted Multi-Field Search</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Title</span><span class="w"> </span><span class="py">matches</span><span class="w"> </span><span class="py">rank</span><span class="w"> </span><span class="py">3x</span><span class="w"> </span><span class="py">higher</span><span class="w"> </span><span class="py">than</span><span class="w"> </span><span class="py">content</span><span class="w"> </span><span class="py">matches</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">article</span><span class="p">:</span><span class="nc">Article</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">graph</span><span class="w"> </span><span class="py">database</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">graph</span><span class="w"> </span><span class="py">database</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score_weighted</span><span class="p">(</span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">graph</span><span class="w"> </span><span class="py">database</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="py">3</span><span class="mf">.0</span><span class="p">)</span><span class="w"> </span><span class="err">+</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score_weighted</span><span class="p">(</span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">graph</span><span class="w"> </span><span class="py">database</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="py">1</span><span class="mf">.0</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">score</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div> <h4 id="synonym-expansion" class="position-relative d-flex align-items-center group"> <span>Synonym Expansion</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="synonym-expansion" aria-haspopup="dialog" aria-label="Share link: Synonym Expansion"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># config/fulltext.yaml</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">analyzers</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">with_synonyms</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">tokenizer</span><span class="p">:</span><span class="w"> </span><span class="l">standard</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">filters</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="l">lowercase</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">synonyms</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">database</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&#34;db&#34;</span><span class="p">,</span><span class="w"> </span><span class="s2">&#34;datastore&#34;</span><span class="p">,</span><span class="w"> </span><span class="s2">&#34;repository&#34;</span><span class="p">]</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">machine learning</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&#34;ml&#34;</span><span class="p">,</span><span class="w"> </span><span class="s2">&#34;artificial intelligence&#34;</span><span class="p">,</span><span class="w"> </span><span class="s2">&#34;ai&#34;</span><span class="p">]</span><span class="w"> </span></span></span></code></pre></div><p><strong>Query with Synonyms</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Automatically</span><span class="w"> </span><span class="py">expands</span><span class="w"> </span><span class="s">&#34;db&#34;</span><span class="w"> </span><span class="py">to</span><span class="w"> </span><span class="py">include</span><span class="w"> </span><span class="s">&#34;database&#34;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">db</span><span class="w"> </span><span class="py">performance</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Matches</span><span class="p">:</span><span class="w"> </span><span class="s">&#34;database performance&#34;</span><span class="p">,</span><span class="w"> </span><span class="s">&#34;db performance&#34;</span><span class="p">,</span><span class="w"> </span><span class="s">&#34;datastore performance&#34;</span><span class="w"> </span></span></span></code></pre></div> <h3 id="integration-with-indexoptimizer" class="position-relative d-flex align-items-center group"> <span>Integration with IndexOptimizer</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="integration-with-indexoptimizer" aria-haspopup="dialog" aria-label="Share link: Integration with IndexOptimizer"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="automatic-index-selection" class="position-relative d-flex align-items-center group"> <span>Automatic Index Selection</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="automatic-index-selection" aria-haspopup="dialog" aria-label="Share link: Automatic Index Selection"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Query</span><span class="w"> </span><span class="py">planner</span><span class="w"> </span><span class="py">automatically</span><span class="w"> </span><span class="py">chooses</span><span class="w"> </span><span class="py">best</span><span class="w"> </span><span class="py">strategy</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">EXPLAIN</span><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">article</span><span class="p">:</span><span class="nc">Article</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">relevance_score</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div><p><strong>Execution Plan</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-json" data-lang="json"><span class="line"><span class="cl"><span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="nt">&#34;logical&#34;</span><span class="p">:</span> <span class="p">[</span> </span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="nt">&#34;op&#34;</span><span class="p">:</span> <span class="s2">&#34;FullTextScan&#34;</span><span class="p">,</span> <span class="nt">&#34;index&#34;</span><span class="p">:</span> <span class="s2">&#34;article_content_idx&#34;</span><span class="p">,</span> <span class="nt">&#34;method&#34;</span><span class="p">:</span> <span class="s2">&#34;BM25&#34;</span><span class="p">},</span> </span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="nt">&#34;op&#34;</span><span class="p">:</span> <span class="s2">&#34;Sort&#34;</span><span class="p">,</span> <span class="nt">&#34;key&#34;</span><span class="p">:</span> <span class="s2">&#34;relevance_score&#34;</span><span class="p">,</span> <span class="nt">&#34;order&#34;</span><span class="p">:</span> <span class="s2">&#34;DESC&#34;</span><span class="p">}</span> </span></span><span class="line"><span class="cl"> <span class="p">],</span> </span></span><span class="line"><span class="cl"> <span class="nt">&#34;properties&#34;</span><span class="p">:</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="nt">&#34;estimated_cost&#34;</span><span class="p">:</span> <span class="mf">32.5</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="nt">&#34;estimated_rows&#34;</span><span class="p">:</span> <span class="mi">150</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="nt">&#34;index_selectivity&#34;</span><span class="p">:</span> <span class="mf">0.15</span> </span></span><span class="line"><span class="cl"> <span class="p">}</span> </span></span><span class="line"><span class="cl"><span class="p">}</span> </span></span></code></pre></div><p><strong>Cost Comparison</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-plaintext" data-lang="plaintext"><span class="line"><span class="cl">Sequential Scan: 1000.0 (scan all 100K docs) </span></span><span class="line"><span class="cl">Basic Full-Text: 28.0 (term matching only) </span></span><span class="line"><span class="cl">BM25 Full-Text: 32.5 (relevance ranking) ✅ SELECTED </span></span></code></pre></div> <h4 id="query-plan-caching" class="position-relative d-flex align-items-center group"> <span>Query Plan Caching</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="query-plan-caching" aria-haspopup="dialog" aria-label="Share link: Query Plan Caching"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Cached BM25 Plans</strong>:</p> <ul> <li>Repeated queries use cached execution plans</li> <li>Parameters (k1, b) optimized for specific patterns</li> <li>LRU eviction for memory efficiency</li> <li>Cache warming for common queries</li> </ul> <p><strong>Example</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">First</span><span class="w"> </span><span class="py">execution</span><span class="p">:</span><span class="w"> </span><span class="nc">135ms</span><span class="w"> </span><span class="p">(</span><span class="py">plan</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">execute</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">climate</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Subsequent</span><span class="w"> </span><span class="py">executions</span><span class="p">:</span><span class="w"> </span><span class="nc">52ms</span><span class="w"> </span><span class="p">(</span><span class="py">execute</span><span class="w"> </span><span class="kd">on</span><span class="py">ly</span><span class="p">,</span><span class="w"> </span><span class="py">plan</span><span class="w"> </span><span class="py">cached</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">climate</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div> <h3 id="use-cases" class="position-relative d-flex align-items-center group"> <span>Use Cases</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="use-cases" aria-haspopup="dialog" aria-label="Share link: Use Cases"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="document-search" class="position-relative d-flex align-items-center group"> <span>Document Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="document-search" aria-haspopup="dialog" aria-label="Share link: Document Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Enterprise Document Management</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">document_content_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Document</span><span class="w"> </span><span class="p">(</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">across</span><span class="w"> </span><span class="py">1M</span><span class="err">+</span><span class="w"> </span><span class="py">documents</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">quarterly</span><span class="w"> </span><span class="py">earnings</span><span class="w"> </span><span class="py">report</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">created_date</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="py">datetime</span><span class="p">(</span><span class="err">&#39;</span><span class="py">2025</span><span class="err">-</span><span class="py">01</span><span class="err">-</span><span class="py">01</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">author</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">quarterly</span><span class="w"> </span><span class="py">earnings</span><span class="w"> </span><span class="py">report</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">20</span><span class="w"> </span></span></span></code></pre></div> <h4 id="e-commerce-product-search" class="position-relative d-flex align-items-center group"> <span>E-commerce Product Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="e-commerce-product-search" aria-haspopup="dialog" aria-label="Share link: E-commerce Product Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Product Catalog Search</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">product_search_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Product</span><span class="w"> </span><span class="p">(</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">description</span><span class="p">,</span><span class="w"> </span><span class="py">tags</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">with</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">ranking</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">description</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">wireless</span><span class="w"> </span><span class="py">bluetooth</span><span class="w"> </span><span class="py">headphones</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="w"> </span><span class="err">&lt;</span><span class="p">=</span><span class="w"> </span><span class="py">150</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">in_stock</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">true</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">rating</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">description</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">wireless</span><span class="w"> </span><span class="py">bluetooth</span><span class="w"> </span><span class="py">headphones</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">match_score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">match_score</span><span class="w"> </span><span class="py">DESC</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">rating</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">50</span><span class="w"> </span></span></span></code></pre></div> <h4 id="knowledge-base-search" class="position-relative d-flex align-items-center group"> <span>Knowledge Base Search</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="knowledge-base-search" aria-haspopup="dialog" aria-label="Share link: Knowledge Base Search"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Technical Documentation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">kb_article_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">KBArticle</span><span class="w"> </span><span class="p">(</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="py">tags</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">relevant</span><span class="w"> </span><span class="py">help</span><span class="w"> </span><span class="py">articles</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">article</span><span class="p">:</span><span class="nc">KBArticle</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">password</span><span class="w"> </span><span class="py">reset</span><span class="w"> </span><span class="py">authentication</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">status</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;</span><span class="py">published</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">category</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">article</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">password</span><span class="w"> </span><span class="py">reset</span><span class="w"> </span><span class="py">authentication</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">relevance</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">helpful_votes</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="p">,</span><span class="w"> </span><span class="py">article</span><span class="err">.</span><span class="py">helpful_votes</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="w"> </span></span></span></code></pre></div> <h3 id="testing--validation" class="position-relative d-flex align-items-center group"> <span>Testing &amp;amp; Validation</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="testing--validation" aria-haspopup="dialog" aria-label="Share link: Testing &amp;amp; Validation"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="unit-tests" class="position-relative d-flex align-items-center group"> <span>Unit Tests</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="unit-tests" aria-haspopup="dialog" aria-label="Share link: Unit Tests"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Comprehensive test coverage validates BM25 implementation:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Run BM25 tests</span> </span></span><span class="line"><span class="cl">zig <span class="nb">test</span> tests/test_bm25_index_optimizer.zig </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="c1"># Integration tests</span> </span></span><span class="line"><span class="cl">zig <span class="nb">test</span> tests/integration_bm25_optimizer.zig </span></span></code></pre></div><p><strong>Test Scenarios</strong>:</p> <ul> <li>✅ Mathematical model validation (k1, b parameters)</li> <li>✅ Cost estimation accuracy</li> <li>✅ Statistics integration</li> <li>✅ Large-scale corpus testing (100K+ documents)</li> <li>✅ Performance characteristics validation</li> </ul> <h4 id="query-testing" class="position-relative d-flex align-items-center group"> <span>Query Testing</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="query-testing" aria-haspopup="dialog" aria-label="Share link: Query Testing"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Relevance Testing</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Create</span><span class="w"> </span><span class="py">test</span><span class="w"> </span><span class="py">corpus</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">doc1</span><span class="p">:</span><span class="nc">TestDoc</span><span class="w"> </span><span class="p">{</span><span class="py">text</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">machine</span><span class="w"> </span><span class="py">learning</span><span class="w"> </span><span class="py">algorithms</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">classification</span><span class="err">&#39;</span><span class="p">})</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">doc2</span><span class="p">:</span><span class="nc">TestDoc</span><span class="w"> </span><span class="p">{</span><span class="py">text</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">introduction</span><span class="w"> </span><span class="py">to</span><span class="w"> </span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="p">})</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">doc3</span><span class="p">:</span><span class="nc">TestDoc</span><span class="w"> </span><span class="p">{</span><span class="py">text</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">deep</span><span class="w"> </span><span class="py">learning</span><span class="w"> </span><span class="py">neural</span><span class="w"> </span><span class="py">networks</span><span class="err">&#39;</span><span class="p">})</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">doc4</span><span class="p">:</span><span class="nc">TestDoc</span><span class="w"> </span><span class="p">{</span><span class="py">text</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">machine</span><span class="w"> </span><span class="py">learning</span><span class="w"> </span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="w"> </span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="p">})</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">verify</span><span class="w"> </span><span class="py">BM25</span><span class="w"> </span><span class="py">scoring</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">TestDoc</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="p">,</span><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">text</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">machine</span><span class="w"> </span><span class="py">learning</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">score</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Expected</span><span class="w"> </span><span class="py">order</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="nc">1</span><span class="err">.</span><span class="w"> </span><span class="py">doc4</span><span class="w"> </span><span class="p">(</span><span class="py">high</span><span class="w"> </span><span class="py">term</span><span class="w"> </span><span class="py">frequency</span><span class="p">,</span><span class="w"> </span><span class="py">but</span><span class="w"> </span><span class="py">length</span><span class="w"> </span><span class="py">penalty</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">2</span><span class="err">.</span><span class="w"> </span><span class="py">doc1</span><span class="w"> </span><span class="p">(</span><span class="py">good</span><span class="w"> </span><span class="py">term</span><span class="w"> </span><span class="py">frequency</span><span class="p">,</span><span class="w"> </span><span class="py">additional</span><span class="w"> </span><span class="py">context</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">3</span><span class="err">.</span><span class="w"> </span><span class="py">doc2</span><span class="w"> </span><span class="p">(</span><span class="py">exact</span><span class="w"> </span><span class="py">match</span><span class="w"> </span><span class="py">in</span><span class="w"> </span><span class="py">title</span><span class="err">-</span><span class="py">like</span><span class="w"> </span><span class="py">position</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">4</span><span class="err">.</span><span class="w"> </span><span class="py">doc3</span><span class="w"> </span><span class="p">(</span><span class="py">related</span><span class="w"> </span><span class="py">but</span><span class="w"> </span><span class="py">no</span><span class="w"> </span><span class="py">exact</span><span class="w"> </span><span class="py">match</span><span class="p">)</span><span class="w"> </span></span></span></code></pre></div> <h3 id="troubleshooting" class="position-relative d-flex align-items-center group"> <span>Troubleshooting</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="troubleshooting" aria-haspopup="dialog" aria-label="Share link: Troubleshooting"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="common-issues" class="position-relative d-flex align-items-center group"> <span>Common Issues</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="common-issues" aria-haspopup="dialog" aria-label="Share link: Common Issues"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p><strong>Issue</strong>: BM25 scores seem incorrect</p> <p><strong>Diagnosis</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Check</span><span class="w"> </span><span class="py">corpus</span><span class="w"> </span><span class="py">statistics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">EXPLAIN</span><span class="w"> </span><span class="py">ANALYZE</span><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">test</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">count</span><span class="p">(</span><span class="py">doc</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Verify</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">statistics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CALL</span><span class="w"> </span><span class="py">db</span><span class="err">.</span><span class="py">index</span><span class="err">.</span><span class="py">stats</span><span class="p">(</span><span class="err">&#39;</span><span class="py">document_content_idx</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span></span></span></code></pre></div><p><strong>Solution</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Rebuild index statistics</span> </span></span><span class="line"><span class="cl">geode query <span class="s2">&#34;CALL db.index.rebuild(&#39;document_content_idx&#39;)&#34;</span> --insecure </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="c1"># Verify vocabulary size and document count</span> </span></span><span class="line"><span class="cl">geode query <span class="s2">&#34;CALL db.index.analyze(&#39;document_content_idx&#39;)&#34;</span> --insecure </span></span></code></pre></div><hr> <p><strong>Issue</strong>: Slow full-text queries</p> <p><strong>Diagnosis</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">PROFILE</span><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">slow</span><span class="w"> </span><span class="kd">query</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nc">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span></code></pre></div><p><strong>Solution</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Add</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">if</span><span class="w"> </span><span class="py">missing</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">document_content_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Document</span><span class="w"> </span><span class="p">(</span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Optimize</span><span class="w"> </span><span class="kd">query</span><span class="w"> </span><span class="p">(</span><span class="nc">reduce</span><span class="w"> </span><span class="py">search</span><span class="w"> </span><span class="py">space</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">created_date</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="py">datetime</span><span class="p">(</span><span class="err">&#39;</span><span class="py">2025</span><span class="err">-</span><span class="py">01</span><span class="err">-</span><span class="py">01</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Filter</span><span class="w"> </span><span class="py">first</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">slow</span><span class="w"> </span><span class="kd">query</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nc">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span></code></pre></div><hr> <p><strong>Issue</strong>: Unexpected ranking order</p> <p><strong>Analysis</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Show</span><span class="w"> </span><span class="py">BM25</span><span class="w"> </span><span class="py">components</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">unexpected</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">term_frequency</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">unexpected</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">tf</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">document_frequency</span><span class="p">(</span><span class="err">&#39;</span><span class="py">unexpected</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">df</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">character_count</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">doc_length</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">unexpected</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">score</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div><p><strong>Common Causes</strong>:</p> <ul> <li>Document length differences (short docs rank higher with b=0.75)</li> <li>Term saturation (diminishing returns after k1=1.2 threshold)</li> <li>IDF effects (rare terms dominate common terms)</li> </ul> <h3 id="best-practices" class="position-relative d-flex align-items-center group"> <span>Best Practices</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="best-practices" aria-haspopup="dialog" aria-label="Share link: Best Practices"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="index-design" class="position-relative d-flex align-items-center group"> <span>Index Design</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="index-design" aria-haspopup="dialog" aria-label="Share link: Index Design"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ol> <li> <p><strong>Index Appropriate Fields</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="err">✅</span><span class="w"> </span><span class="py">Good</span><span class="p">:</span><span class="w"> </span><span class="nc">Index</span><span class="w"> </span><span class="py">text</span><span class="w"> </span><span class="py">fields</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">article_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Article</span><span class="w"> </span><span class="p">(</span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="err">❌</span><span class="w"> </span><span class="py">Bad</span><span class="p">:</span><span class="w"> </span><span class="nc">Indexing</span><span class="w"> </span><span class="py">short</span><span class="w"> </span><span class="py">strings</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">tag_idx</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Tag</span><span class="w"> </span><span class="p">(</span><span class="py">name</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Use</span><span class="w"> </span><span class="py">standard</span><span class="w"> </span><span class="py">index</span><span class="w"> </span></span></span></code></pre></div></li> <li> <p><strong>Multi-Field Strategy</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Index</span><span class="w"> </span><span class="py">related</span><span class="w"> </span><span class="py">fields</span><span class="w"> </span><span class="py">together</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">article_search</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Article</span><span class="w"> </span><span class="p">(</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">abstract</span><span class="p">,</span><span class="w"> </span><span class="py">content</span><span class="p">)</span><span class="w"> </span><span class="py">USING</span><span class="w"> </span><span class="py">fulltext</span><span class="w"> </span></span></span></code></pre></div></li> <li> <p><strong>Avoid Over-Indexing</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Don</span><span class="err">&#39;</span><span class="py">t</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">every</span><span class="w"> </span><span class="py">text</span><span class="w"> </span><span class="py">field</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Focus</span><span class="w"> </span><span class="kd">on</span><span class="w"> </span><span class="py">frequently</span><span class="w"> </span><span class="py">searched</span><span class="w"> </span><span class="py">fields</span><span class="w"> </span></span></span></code></pre></div></li> </ol> <h4 id="query-optimization" class="position-relative d-flex align-items-center group"> <span>Query Optimization</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="query-optimization" aria-haspopup="dialog" aria-label="Share link: Query Optimization"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ol> <li> <p><strong>Combine with Filters</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="err">✅</span><span class="w"> </span><span class="py">Good</span><span class="p">:</span><span class="w"> </span><span class="nc">Filter</span><span class="w"> </span><span class="py">then</span><span class="w"> </span><span class="py">search</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">category</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;</span><span class="py">technical</span><span class="err">&#39;</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Filter</span><span class="w"> </span><span class="py">first</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">optimization</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span></code></pre></div></li> <li> <p><strong>Use Appropriate Limits</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Always</span><span class="w"> </span><span class="py">limit</span><span class="w"> </span><span class="py">full</span><span class="err">-</span><span class="py">text</span><span class="w"> </span><span class="py">queries</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">doc</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="w"> </span><span class="py">CONTAINS</span><span class="w"> </span><span class="err">&#39;</span><span class="py">search</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">search</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">100</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="err">✅</span><span class="w"> </span><span class="py">Good</span><span class="w"> </span></span></span></code></pre></div></li> <li> <p><strong>Leverage Scoring</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Use</span><span class="w"> </span><span class="py">BM25</span><span class="w"> </span><span class="py">scores</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">ranking</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">doc</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">bm25_score</span><span class="p">(</span><span class="py">doc</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="kd">query</span><span class="p">)</span><span class="w"> </span><span class="nc">AS</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">relevance</span><span class="w"> </span><span class="py">DESC</span><span class="w"> </span></span></span></code></pre></div></li> </ol> <h4 id="performance-tuning" class="position-relative d-flex align-items-center group"> <span>Performance Tuning</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="performance-tuning" aria-haspopup="dialog" aria-label="Share link: Performance Tuning"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ol> <li> <p><strong>Monitor Statistics</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Regular statistics updates</span> </span></span><span class="line"><span class="cl"><span class="m">0</span> <span class="m">2</span> * * * geode query <span class="s2">&#34;CALL db.index.analyze(&#39;*&#39;)&#34;</span> </span></span></code></pre></div></li> <li> <p><strong>Tune Parameters</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># Adjust for your corpus</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">fulltext_indexes</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">default</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">k1</span><span class="p">:</span><span class="w"> </span><span class="m">1.2</span><span class="w"> </span><span class="c"># Standard</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">b</span><span class="p">:</span><span class="w"> </span><span class="m">0.75</span><span class="w"> </span><span class="c"># Balanced length normalization</span><span class="w"> </span></span></span></code></pre></div></li> <li> <p><strong>Cache Frequently Used Plans</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="nt">query_cache</span><span class="p">:</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">max_plans</span><span class="p">:</span><span class="w"> </span><span class="m">1000</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">bm25_plan_ttl</span><span class="p">:</span><span class="w"> </span><span class="m">3600</span><span class="w"> </span><span class="c"># 1 hour</span><span class="w"> </span></span></span></code></pre></div></li> </ol> <h3 id="references" class="position-relative d-flex align-items-center group"> <span>References</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="references" aria-haspopup="dialog" aria-label="Share link: References"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3> <h4 id="academic-papers" class="position-relative d-flex align-items-center group"> <span>Academic Papers</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="academic-papers" aria-haspopup="dialog" aria-label="Share link: Academic Papers"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ul> <li> <p><strong>Robertson &amp; Zaragoza (2009)</strong>: &ldquo;The Probabilistic Relevance Framework: BM25 and Beyond&rdquo;</p> <ul> <li>Foundation of modern BM25 implementations</li> </ul> </li> <li> <p><strong>Manning et al. (2008)</strong>: &ldquo;Introduction to Information Retrieval&rdquo;</p> <ul> <li>Comprehensive text on search algorithms</li> <li><a href="https://nlp.stanford.edu/IR-book/" aria-label="https://nlp.stanford.edu/IR-book/ – opens in new window" target="_blank" rel="noopener noreferrer" >https://nlp.stanford.edu/IR-book/ <span aria-hidden="true" class="external-icon">↗</span> </a> </li> </ul> </li> </ul> <h4 id="standards--implementations" class="position-relative d-flex align-items-center group"> <span>Standards &amp;amp; Implementations</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="standards--implementations" aria-haspopup="dialog" aria-label="Share link: Standards &amp;amp; Implementations"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ul> <li> <p><strong>Apache Lucene</strong>: Reference BM25 implementation</p> <ul> <li><a href="https://lucene.apache.org/" aria-label="https://lucene.apache.org/ – opens in new window" target="_blank" rel="noopener noreferrer" >https://lucene.apache.org/ <span aria-hidden="true" class="external-icon">↗</span> </a> </li> </ul> </li> <li> <p><strong>Elasticsearch BM25</strong>: Production-proven search engine</p> <ul> <li><a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html" aria-label="https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html – opens in new window" target="_blank" rel="noopener noreferrer" >https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html <span aria-hidden="true" class="external-icon">↗</span> </a> </li> </ul> </li> </ul> <h4 id="code-location" class="position-relative d-flex align-items-center group"> <span>Code Location</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="code-location" aria-haspopup="dialog" aria-label="Share link: Code Location"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><ul> <li><strong>Implementation</strong>: <code>src/server/index_optimizer.zig</code></li> <li><strong>Tests</strong>: <code>tests/test_bm25_index_optimizer.zig</code></li> <li><strong>Integration</strong>: <code>tests/integration_bm25_optimizer.zig</code></li> <li><strong>Documentation</strong>: <code>docs/BM25_INDEX_OPTIMIZER_INTEGRATION.md</code></li> </ul> <h3 id="next-steps" class="position-relative d-flex align-items-center group"> <span>Next Steps</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="next-steps" aria-haspopup="dialog" aria-label="Share link: Next Steps"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p><strong>For New Users</strong>:</p> <ul> <li><a href="/docs/query/indexing-and-optimization/" >Indexing Guide</a> - Full indexing overview</li> <li><a href="/docs/query/performance-tuning/" >Query Performance Tuning</a> - Optimization strategies</li> <li><a href="/docs/gql/guide/" >GQL Guide</a> - Complete query language reference</li> </ul> <p><strong>For Advanced Users</strong>:</p> <ul> <li><a href="/docs/query/materialized-views/" >Materialized Views</a> - Pre-computed search results</li> <li><a href="/docs/query/performance-tuning/" >Query Optimization</a> - EXPLAIN and PROFILE analysis</li> <li><a href="/docs/gql/advanced-patterns/" >Advanced GQL Patterns</a> - Complex search patterns</li> </ul> <p><strong>For Administrators</strong>:</p> <ul> <li><a href="/docs/query/performance-tuning/" >Performance Tuning</a> - System optimization</li> <li><a href="/docs/ops/observability/" >Monitoring</a> - Search performance tracking</li> <li><a href="/docs/architecture/performance-and-scaling/" >Scaling</a> - Large-scale deployments</li> </ul> <hr> <p><strong>Document Version</strong>: 1.0 <strong>Last Updated</strong>: January 24, 2026 <strong>Status</strong>: Production Ready <strong>Test Coverage</strong>: 10 comprehensive tests (6 unit + 4 integration) <strong>Performance</strong>: 40-60% search quality improvement, sub-second queries on 100K+ documents</p>