<!-- CANARY: REQ=REQ-DOCS-001; FEATURE="Docs"; ASPECT=Documentation; STATUS=TESTED; OWNER=docs; UPDATED=2026-01-15 -->
<h2 id="vector-similarity-search-in-geode" class="position-relative d-flex align-items-center group">
<span>Vector Similarity Search in Geode</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="vector-similarity-search-in-geode"
aria-haspopup="dialog"
aria-label="Share link: Vector Similarity Search in Geode">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h2><div id="headingShareModal" class="heading-share-modal" role="dialog" aria-modal="true" aria-labelledby="headingShareTitle" hidden>
<div class="hsm-dialog" role="document">
<div class="hsm-header">
<h2 id="headingShareTitle" class="h6 mb-0 fw-bold">Share this section</h2>
<button type="button" class="hsm-close" aria-label="Close">
<i class="fa-solid fa-xmark"></i>
</button>
</div>
<div class="hsm-body">
<label for="headingShareInput" class="form-label small text-muted mb-1 text-uppercase fw-bold" style="font-size: 0.7rem; letter-spacing: 0.5px;">Permalink</label>
<div class="input-group mb-4 hsm-url-group">
<input id="headingShareInput" type="text" class="form-control font-monospace" readonly aria-readonly="true" style="font-size: 0.85rem;" />
<button class="btn btn-primary hsm-copy" type="button" aria-label="Copy" title="Copy">
<i class="fa-duotone fa-clipboard" aria-hidden="true"></i>
</button>
</div>
<div class="small fw-bold mb-2 text-muted text-uppercase" style="font-size: 0.7rem; letter-spacing: 0.5px;">Share via</div>
<div class="hsm-share-grid">
<a id="share-twitter" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-twitter me-2"></i>Twitter
</a>
<a id="share-linkedin" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-linkedin me-2"></i>LinkedIn
</a>
<a id="share-facebook" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-facebook me-2"></i>Facebook
</a>
</div>
</div>
</div>
</div>
<style>
.heading-share-modal {
position: fixed;
inset: 0;
display: flex;
justify-content: center;
align-items: center;
background: rgba(0, 0, 0, 0.6);
z-index: 1050;
padding: 1rem;
backdrop-filter: blur(4px);
-webkit-backdrop-filter: blur(4px);
}
.heading-share-modal[hidden] { display: none !important; }
.hsm-dialog {
max-width: 420px;
width: 100%;
background: var(--bs-body-bg, #fff);
color: var(--bs-body-color, #212529);
border: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
overflow: hidden;
animation: hsm-fade-in 0.2s ease-out;
}
@keyframes hsm-fade-in {
from { opacity: 0; transform: scale(0.95); }
to { opacity: 1; transform: scale(1); }
}
[data-bs-theme="dark"] .hsm-dialog {
background: #1e293b;
border-color: rgba(255,255,255,0.1);
color: #f8f9fa;
}
.hsm-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
background: rgba(0,0,0,0.02);
}
[data-bs-theme="dark"] .hsm-header {
background: rgba(255,255,255,0.02);
border-color: rgba(255,255,255,0.1);
}
.hsm-close {
background: transparent;
border: none;
color: inherit;
opacity: 0.5;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-size: 1.2rem;
line-height: 1;
transition: opacity 0.2s;
}
.hsm-close:hover {
opacity: 1;
}
.hsm-body {
padding: 1.5rem;
}
.hsm-url-group {
display: flex !important;
align-items: stretch;
}
.hsm-url-group .form-control {
flex: 1;
min-width: 0;
margin: 0;
background: var(--bs-secondary-bg, #f8f9fa);
border-color: var(--bs-border-color, #dee2e6);
border-top-right-radius: 0;
border-bottom-right-radius: 0;
height: 42px;
}
.hsm-url-group .btn {
flex: 0 0 auto;
margin: 0;
margin-left: -1px;
border-top-left-radius: 0;
border-bottom-left-radius: 0;
height: 42px;
display: flex;
align-items: center;
justify-content: center;
padding: 0 1.25rem;
z-index: 2;
}
[data-bs-theme="dark"] .hsm-url-group .form-control {
background: #0f172a;
border-color: #334155;
color: #e2e8f0;
}
.hsm-share-grid {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.hsm-share-grid .btn {
display: flex;
align-items: center;
justify-content: center;
font-size: 0.9rem;
padding: 0.6rem;
border-color: var(--bs-border-color);
width: 100%;
}
[data-bs-theme="dark"] .hsm-share-grid .btn {
color: #e2e8f0;
border-color: #475569;
}
[data-bs-theme="dark"] .hsm-share-grid .btn:hover {
background: #334155;
border-color: #cbd5e1;
}
</style>
<script>
(function(){
const modal = document.getElementById('headingShareModal');
if(!modal) return;
const input = modal.querySelector('#headingShareInput');
const copyBtn = modal.querySelector('.hsm-copy');
const twitter = modal.querySelector('#share-twitter');
const linkedin = modal.querySelector('#share-linkedin');
const facebook = modal.querySelector('#share-facebook');
const closeBtn = modal.querySelector('.hsm-close');
let lastFocus=null;
let trapBound=false;
function buildUrl(id){ return window.location.origin + window.location.pathname + '#' + id; }
function isOpen(){ return !modal.hasAttribute('hidden'); }
function hydrate(id){
const url=buildUrl(id);
input.value=url;
const enc=encodeURIComponent(url);
const text=encodeURIComponent(document.title);
if(twitter) twitter.href=`https://twitter.com/intent/tweet?url=${enc}&text=${text}`;
if(linkedin) linkedin.href=`https://www.linkedin.com/sharing/share-offsite/?url=${enc}`;
if(facebook) facebook.href=`https://www.facebook.com/sharer/sharer.php?u=${enc}`;
}
function openModal(id){
lastFocus=document.activeElement;
hydrate(id);
if(!isOpen()){
modal.removeAttribute('hidden');
}
requestAnimationFrame(()=>{ input.focus(); });
trapFocus();
}
function closeModal(){
if(!isOpen()) return;
modal.setAttribute('hidden','');
if(lastFocus && typeof lastFocus.focus==='function') lastFocus.focus();
}
function copyCurrent(){
try{ navigator.clipboard.writeText(input.value).then(()=>feedback(true),()=>fallback()); }
catch(e){ fallback(); }
}
function fallback(){ input.select(); try{ document.execCommand('copy'); feedback(true);}catch(e){ feedback(false);} }
function feedback(ok){ if(!copyBtn) return; const icon=copyBtn.querySelector('i'); if(!icon) return; const prev=copyBtn.getAttribute('data-prev')||icon.className; if(!copyBtn.getAttribute('data-prev')) copyBtn.setAttribute('data-prev',prev); icon.className= ok ? 'fa-duotone fa-clipboard-check':'fa-duotone fa-circle-exclamation'; setTimeout(()=>{ icon.className=prev; },1800); }
function handleShareClick(e){ e.preventDefault(); const btn=e.currentTarget; const id=btn.getAttribute('data-share-target'); if(id) openModal(id); }
function bindShareButtons(){
document.querySelectorAll('.h-share').forEach(btn=>{
if(!btn.dataset.hShareBound){ btn.addEventListener('click', handleShareClick); btn.dataset.hShareBound='1'; }
});
}
bindShareButtons();
if(document.readyState==='loading'){
document.addEventListener('DOMContentLoaded', bindShareButtons);
} else {
requestAnimationFrame(bindShareButtons);
}
document.addEventListener('click', function(e){
const shareBtn=e.target.closest && e.target.closest('.h-share');
if(shareBtn && !shareBtn.dataset.hShareBound){ handleShareClick.call(shareBtn, e); }
}, true);
document.addEventListener('click', e=>{
if(e.target===modal) closeModal();
if(e.target.closest && e.target.closest('.hsm-close')){ e.preventDefault(); closeModal(); }
if(copyBtn && (e.target===copyBtn || (e.target.closest && e.target.closest('.hsm-copy')))) { e.preventDefault(); copyCurrent(); }
});
document.addEventListener('keydown', e=>{ if(e.key==='Escape' && isOpen()) closeModal(); });
function trapFocus(){
if(trapBound) return;
trapBound=true;
modal.addEventListener('keydown', f=>{ if(f.key==='Tab' && isOpen()){ const focusable=[...modal.querySelectorAll('a[href],button,input,textarea,select,[tabindex]:not([tabindex="-1"])')].filter(el=>!el.hasAttribute('disabled')); if(!focusable.length) return; const first=focusable[0]; const last=focusable[focusable.length-1]; if(f.shiftKey && document.activeElement===first){ f.preventDefault(); last.focus(); } else if(!f.shiftKey && document.activeElement===last){ f.preventDefault(); first.focus(); } } });
}
if(closeBtn) closeBtn.addEventListener('click', e=>{ e.preventDefault(); closeModal(); });
})();
</script><p>Vector similarity search is a powerful feature in Geode that enables efficient nearest-neighbor queries over high-dimensional vector embeddings stored directly in graph properties. This capability is essential for modern machine learning applications including semantic search, recommendation systems, image similarity, and retrieval-augmented generation (RAG) workloads.</p>
<h3 id="introduction-to-vector-search" class="position-relative d-flex align-items-center group">
<span>Introduction to Vector Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="introduction-to-vector-search"
aria-haspopup="dialog"
aria-label="Share link: Introduction to Vector Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Vector search addresses the challenge of finding similar items in high-dimensional space. Instead of exact matching, vector search uses distance metrics (cosine similarity, Euclidean distance, dot product) to find the k-nearest neighbors to a query vector. This technology powers applications like:</p>
<ul>
<li><strong>Semantic Search</strong>: Finding documents or content with similar meaning, not just matching keywords</li>
<li><strong>Recommendation Engines</strong>: Identifying items similar to user preferences</li>
<li><strong>Image and Video Search</strong>: Finding visually similar media by comparing embedding vectors</li>
<li><strong>Anomaly Detection</strong>: Identifying outliers by measuring distance from normal patterns</li>
<li><strong>Question Answering</strong>: Retrieving relevant context for large language models (LLMs)</li>
</ul>
<p>Traditional exact nearest-neighbor search has O(n) complexity, making it impractical for large datasets. Geode uses Hierarchical Navigable Small World (HNSW) graphs to achieve approximate nearest-neighbor (ANN) search with logarithmic complexity.</p>
<h3 id="geodes-vector-search-implementation" class="position-relative d-flex align-items-center group">
<span>Geode&rsquo;s Vector Search Implementation</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="geodes-vector-search-implementation"
aria-haspopup="dialog"
aria-label="Share link: Geodes Vector Search Implementation">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Geode implements vector search as native graph capabilities through several components:</p>
<h4 id="hnsw-index-integration" class="position-relative d-flex align-items-center group">
<span>HNSW Index Integration</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="hnsw-index-integration"
aria-haspopup="dialog"
aria-label="Share link: HNSW Index Integration">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>HNSW indexes are stored alongside graph data, allowing seamless integration of vector search with graph traversals. Properties containing vector data can be indexed using:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">CREATE</span><span class="w"> </span><span class="py">VECTOR</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">product_embeddings</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ON</span><span class="w"> </span><span class="py">Product</span><span class="p">(</span><span class="py">embedding</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">metric</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">dimensions</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">768</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ef_construction</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">200</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">m</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">16</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div><p><strong>Parameters explained</strong>:</p>
<ul>
<li><code>metric</code>: Distance function (cosine, euclidean, dot_product)</li>
<li><code>dimensions</code>: Vector dimensionality (must match your embeddings)</li>
<li><code>ef_construction</code>: Build-time accuracy parameter (higher = more accurate, slower build)</li>
<li><code>m</code>: Maximum connections per node (higher = better recall, more memory)</li>
</ul>
<h4 id="native-gql-vector-functions" class="position-relative d-flex align-items-center group">
<span>Native GQL Vector Functions</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="native-gql-vector-functions"
aria-haspopup="dialog"
aria-label="Share link: Native GQL Vector Functions">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Geode extends GQL with vector search functions that integrate naturally with pattern matching:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query_vector</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.8</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">description</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query_vector</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="hybrid-search-combining-graph-and-vector-queries" class="position-relative d-flex align-items-center group">
<span>Hybrid Search: Combining Graph and Vector Queries</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="hybrid-search-combining-graph-and-vector-queries"
aria-haspopup="dialog"
aria-label="Share link: Hybrid Search: Combining Graph and Vector Queries">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Geode’s unique advantage is combining graph topology with vector similarity:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">similar</span><span class="w"> </span><span class="py">products</span><span class="w"> </span><span class="py">in</span><span class="w"> </span><span class="py">the</span><span class="w"> </span><span class="py">same</span><span class="w"> </span><span class="py">category</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">category</span><span class="p">:</span><span class="nc">Category</span><span class="w"> </span><span class="p">{</span><span class="py">name</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">Electronics</span><span class="err">'</span><span class="p">})</span><span class="err">-</span><span class="p">[:</span><span class="nc">CONTAINS</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p</span><span class="p">,</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query_vector</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">similarity</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.75</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">similarity</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">5</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Collaborative</span><span class="w"> </span><span class="py">filtering</span><span class="w"> </span><span class="py">with</span><span class="w"> </span><span class="py">vector</span><span class="w"> </span><span class="py">search</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">user</span><span class="p">:</span><span class="nc">User</span><span class="w"> </span><span class="p">{</span><span class="py">id</span><span class="p">:</span><span class="w"> </span><span class="nv">$user_id</span><span class="p">})</span><span class="err">-</span><span class="p">[:</span><span class="nc">PURCHASED</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="nc">past</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">collect</span><span class="p">(</span><span class="py">past</span><span class="err">.</span><span class="py">embedding</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">user_history</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">candidate</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="p">(</span><span class="py">user</span><span class="p">)</span><span class="err">-</span><span class="p">[:</span><span class="nc">PURCHASED</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="py">candidate</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">candidate</span><span class="p">,</span><span class="w"> </span><span class="py">avg</span><span class="p">([</span><span class="py">emb</span><span class="w"> </span><span class="py">IN</span><span class="w"> </span><span class="py">user_history</span><span class="w"> </span><span class="p">|</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">candidate</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="py">emb</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)])</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">avg_similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">avg_similarity</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.7</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">candidate</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">avg_similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">avg_similarity</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="use-cases-and-code-examples" class="position-relative d-flex align-items-center group">
<span>Use Cases and Code Examples</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="use-cases-and-code-examples"
aria-haspopup="dialog"
aria-label="Share link: Use Cases and Code Examples">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="use-case-1-semantic-document-search" class="position-relative d-flex align-items-center group">
<span>Use Case 1: Semantic Document Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="use-case-1-semantic-document-search"
aria-haspopup="dialog"
aria-label="Share link: Use Case 1: Semantic Document Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Store document embeddings generated from sentence transformers or OpenAI models:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span>
</span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">asyncio</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">create_document_index</span><span class="p">():</span>
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"localhost"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># Create schema with vector index</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> CREATE VECTOR INDEX doc_embeddings
</span></span></span><span class="line"><span class="cl"><span class="s2"> ON Document(embedding)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WITH (metric = 'cosine', dimensions = 384, m = 16);
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Insert documents with embeddings</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> CREATE (d:Document {
</span></span></span><span class="line"><span class="cl"><span class="s2"> title: 'Introduction to Graph Databases',
</span></span></span><span class="line"><span class="cl"><span class="s2"> content: 'Graph databases model data as nodes and relationships...',
</span></span></span><span class="line"><span class="cl"><span class="s2"> embedding: $embedding
</span></span></span><span class="line"><span class="cl"><span class="s2"> })
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">,</span> <span class="p">{</span><span class="s2">"embedding"</span><span class="p">:</span> <span class="n">generate_embedding</span><span class="p">(</span><span class="s2">"Graph databases model..."</span><span class="p">)})</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">semantic_search</span><span class="p">(</span><span class="n">query_text</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"localhost"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">query_embedding</span> <span class="o">=</span> <span class="n">generate_embedding</span><span class="p">(</span><span class="n">query_text</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (d:Document)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WITH d, vector_similarity(d.embedding, $query_emb, 'cosine') AS score
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE score > 0.6
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN d.title, d.content, score
</span></span></span><span class="line"><span class="cl"><span class="s2"> ORDER BY score DESC
</span></span></span><span class="line"><span class="cl"><span class="s2"> LIMIT 5
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">,</span> <span class="p">{</span><span class="s2">"query_emb"</span><span class="p">:</span> <span class="n">query_embedding</span><span class="p">})</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">rows</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">row</span><span class="p">[</span><span class="s1">'score'</span><span class="p">]</span><span class="si">:</span><span class="s2">.3f</span><span class="si">}</span><span class="s2"> - </span><span class="si">{</span><span class="n">row</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</span></span></code></pre></div>
<h4 id="use-case-2-product-recommendations-with-knowledge-graph" class="position-relative d-flex align-items-center group">
<span>Use Case 2: Product Recommendations with Knowledge Graph</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="use-case-2-product-recommendations-with-knowledge-graph"
aria-haspopup="dialog"
aria-label="Share link: Use Case 2: Product Recommendations with Knowledge Graph">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Combine product similarity with graph relationships:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">products</span><span class="w"> </span><span class="py">similar</span><span class="w"> </span><span class="py">to</span><span class="w"> </span><span class="py">items</span><span class="w"> </span><span class="py">in</span><span class="w"> </span><span class="py">cart</span><span class="p">,</span><span class="w"> </span><span class="py">considering</span><span class="w"> </span><span class="py">brand</span><span class="w"> </span><span class="py">preferences</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">user</span><span class="p">:</span><span class="nc">User</span><span class="w"> </span><span class="p">{</span><span class="py">id</span><span class="p">:</span><span class="w"> </span><span class="nv">$user_id</span><span class="p">})</span><span class="err">-</span><span class="p">[:</span><span class="nc">PREFERS</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="nc">brand</span><span class="p">:</span><span class="nc">Brand</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">brand</span><span class="p">)</span><span class="err">-</span><span class="p">[:</span><span class="nc">MANUFACTURES</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="py">product</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">cart_item</span><span class="p">:</span><span class="nc">Product</span><span class="w"> </span><span class="p">{</span><span class="py">id</span><span class="p">:</span><span class="w"> </span><span class="nv">$cart_item_id</span><span class="p">})</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nc">WITH</span><span class="w"> </span><span class="py">product</span><span class="p">,</span><span class="w"> </span><span class="py">cart_item</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">product</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="py">cart_item</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">similarity</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.7</span><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">product</span><span class="err">.</span><span class="py">id</span><span class="w"> </span><span class="err"><></span><span class="w"> </span><span class="py">cart_item</span><span class="err">.</span><span class="py">id</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">product</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">product</span><span class="err">.</span><span class="py">price</span><span class="p">,</span><span class="w"> </span><span class="py">similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">similarity</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">5</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="use-case-3-image-similarity-search" class="position-relative d-flex align-items-center group">
<span>Use Case 3: Image Similarity Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="use-case-3-image-similarity-search"
aria-haspopup="dialog"
aria-label="Share link: Use Case 3: Image Similarity Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Use image embeddings from models like CLIP or ResNet:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">find_similar_images</span><span class="p">(</span><span class="n">image_path</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">embedding</span> <span class="o">=</span> <span class="n">image_encoder</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">image_path</span><span class="p">)</span> <span class="c1"># Generate embedding</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"localhost"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (img:Image)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WITH img, vector_similarity(img.embedding, $query_emb, 'euclidean') AS distance
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE distance < 0.5
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN img.url, img.tags, distance
</span></span></span><span class="line"><span class="cl"><span class="s2"> ORDER BY distance ASC
</span></span></span><span class="line"><span class="cl"><span class="s2"> LIMIT $limit
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">,</span> <span class="p">{</span><span class="s2">"query_emb"</span><span class="p">:</span> <span class="n">embedding</span><span class="p">,</span> <span class="s2">"limit"</span><span class="p">:</span> <span class="n">limit</span><span class="p">})</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span>
</span></span></code></pre></div>
<h3 id="best-practices" class="position-relative d-flex align-items-center group">
<span>Best Practices</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="best-practices"
aria-haspopup="dialog"
aria-label="Share link: Best Practices">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="choosing-index-parameters" class="position-relative d-flex align-items-center group">
<span>Choosing Index Parameters</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="choosing-index-parameters"
aria-haspopup="dialog"
aria-label="Share link: Choosing Index Parameters">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Dimensions</strong>: Match your embedding model exactly:</p>
<ul>
<li>Sentence transformers: 384, 768, 1024</li>
<li>OpenAI ada-002: 1536</li>
<li>CLIP: 512 or 768</li>
<li>Custom models: verify output shape</li>
</ul>
<p><strong>Metric selection</strong>:</p>
<ul>
<li><strong>Cosine</strong>: Best for normalized embeddings (most common)</li>
<li><strong>Euclidean</strong>: When magnitude matters</li>
<li><strong>Dot product</strong>: For sparse vectors or specific models</li>
</ul>
<p><strong>HNSW tuning</strong>:</p>
<ul>
<li><code>m = 16</code> (default): Good balance for most cases</li>
<li><code>m = 32</code>: Higher recall, 2x memory usage</li>
<li><code>ef_construction = 200</code>: Production default</li>
<li><code>ef_construction = 400</code>: Higher quality index, slower build</li>
</ul>
<h4 id="embedding-generation" class="position-relative d-flex align-items-center group">
<span>Embedding Generation</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="embedding-generation"
aria-haspopup="dialog"
aria-label="Share link: Embedding Generation">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Consistency is critical</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># WRONG: Different models or preprocessing</span>
</span></span><span class="line"><span class="cl"><span class="n">doc_embedding</span> <span class="o">=</span> <span class="n">model_v1</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">text</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"><span class="n">query_embedding</span> <span class="o">=</span> <span class="n">model_v2</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> <span class="c1"># Won't match!</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># RIGHT: Same model and preprocessing</span>
</span></span><span class="line"><span class="cl"><span class="k">def</span> <span class="nf">generate_embedding</span><span class="p">(</span><span class="n">text</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">normalized</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">sentence_transformer</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">normalized</span><span class="p">)</span>
</span></span></code></pre></div><p><strong>Batch processing for efficiency</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">index_documents_batch</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"localhost"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">documents</span><span class="p">),</span> <span class="n">batch_size</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">batch</span> <span class="o">=</span> <span class="n">documents</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="n">embeddings</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">encode</span><span class="p">([</span><span class="n">d</span><span class="o">.</span><span class="n">text</span> <span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">doc</span><span class="p">,</span> <span class="n">emb</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">batch</span><span class="p">,</span> <span class="n">embeddings</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> CREATE (d:Document {
</span></span></span><span class="line"><span class="cl"><span class="s2"> id: $id,
</span></span></span><span class="line"><span class="cl"><span class="s2"> text: $text,
</span></span></span><span class="line"><span class="cl"><span class="s2"> embedding: $emb
</span></span></span><span class="line"><span class="cl"><span class="s2"> })
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">,</span> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="n">doc</span><span class="o">.</span><span class="n">id</span><span class="p">,</span> <span class="s2">"text"</span><span class="p">:</span> <span class="n">doc</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s2">"emb"</span><span class="p">:</span> <span class="n">emb</span><span class="o">.</span><span class="n">tolist</span><span class="p">()})</span>
</span></span></code></pre></div>
<h4 id="query-optimization" class="position-relative d-flex align-items-center group">
<span>Query Optimization</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="query-optimization"
aria-haspopup="dialog"
aria-label="Share link: Query Optimization">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Use appropriate similarity thresholds</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Too</span><span class="w"> </span><span class="py">restrictive</span><span class="p">:</span><span class="w"> </span><span class="nc">May</span><span class="w"> </span><span class="py">return</span><span class="w"> </span><span class="py">no</span><span class="w"> </span><span class="py">results</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">n</span><span class="err">.</span><span class="py">emb</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.95</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Too</span><span class="w"> </span><span class="py">permissive</span><span class="p">:</span><span class="w"> </span><span class="nc">Returns</span><span class="w"> </span><span class="py">irrelevant</span><span class="w"> </span><span class="py">results</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">n</span><span class="err">.</span><span class="py">emb</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.3</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Just</span><span class="w"> </span><span class="py">right</span><span class="p">:</span><span class="w"> </span><span class="nc">Adjust</span><span class="w"> </span><span class="py">based</span><span class="w"> </span><span class="kd">on</span><span class="w"> </span><span class="py">your</span><span class="w"> </span><span class="py">data</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">n</span><span class="err">.</span><span class="py">emb</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.7</span><span class="w">
</span></span></span></code></pre></div><p><strong>Limit result sets</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">HNSW</span><span class="w"> </span><span class="py">is</span><span class="w"> </span><span class="py">optimized</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">top</span><span class="err">-</span><span class="py">k</span><span class="w"> </span><span class="py">queries</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">d</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="p">,</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">score</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">20</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">HNSW</span><span class="w"> </span><span class="py">explores</span><span class="w"> </span><span class="kd">on</span><span class="py">ly</span><span class="w"> </span><span class="py">as</span><span class="w"> </span><span class="py">needed</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="performance-considerations" class="position-relative d-flex align-items-center group">
<span>Performance Considerations</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="performance-considerations"
aria-haspopup="dialog"
aria-label="Share link: Performance Considerations">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="indexing-performance" class="position-relative d-flex align-items-center group">
<span>Indexing Performance</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="indexing-performance"
aria-haspopup="dialog"
aria-label="Share link: Indexing Performance">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Build time scales with dataset size</strong>:</p>
<ul>
<li>100K vectors: ~1-2 minutes</li>
<li>1M vectors: ~15-30 minutes</li>
<li>10M vectors: ~3-5 hours</li>
</ul>
<p><strong>Memory requirements</strong>:</p>
<ul>
<li>Base: <code>num_vectors * dimensions * 4 bytes</code> (float32)</li>
<li>HNSW overhead: <code>num_vectors * m * 16 * 4 bytes</code></li>
<li>Example: 1M vectors × 768D × 16M = ~50GB RAM</li>
</ul>
<p><strong>Incremental indexing</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Create</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">first</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">VECTOR</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">CONCURRENTLY</span><span class="w"> </span><span class="py">product_embeddings</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ON</span><span class="w"> </span><span class="py">Product</span><span class="p">(</span><span class="py">embedding</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="p">(</span><span class="py">metric</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="py">dimensions</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">768</span><span class="p">)</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Insert</span><span class="w"> </span><span class="py">nodes</span><span class="w"> </span><span class="py">normally</span><span class="err">;</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">updates</span><span class="w"> </span><span class="py">incrementally</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="w"> </span><span class="p">{</span><span class="py">name</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">New</span><span class="w"> </span><span class="py">Item</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="py">embedding</span><span class="p">:</span><span class="w"> </span><span class="nv">$emb</span><span class="p">})</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="query-performance" class="position-relative d-flex align-items-center group">
<span>Query Performance</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="query-performance"
aria-haspopup="dialog"
aria-label="Share link: Query Performance">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Typical latency</strong> (10k vectors, 10-NN):</p>
<ul>
<li>Single vector search: 1-5ms at ~90% recall</li>
<li>Combined graph + vector: workload-dependent (varies by traversal and filters)</li>
<li>Batch queries: throughput depends on workload and hardware</li>
</ul>
<p><strong>Tuning runtime accuracy</strong> (not yet exposed, coming soon):</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Higher</span><span class="w"> </span><span class="py">ef_search</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">more</span><span class="w"> </span><span class="py">accurate</span><span class="p">,</span><span class="w"> </span><span class="py">slower</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">vector_search_ef</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">100</span><span class="err">;</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Default</span><span class="p">:</span><span class="w"> </span><span class="nc">50</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">d</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="p">,</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">score</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="scaling-vector-search" class="position-relative d-flex align-items-center group">
<span>Scaling Vector Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="scaling-vector-search"
aria-haspopup="dialog"
aria-label="Share link: Scaling Vector Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Horizontal scaling</strong>:</p>
<ul>
<li>Partition large datasets by category or domain</li>
<li>Use graph structure to route queries to relevant partitions</li>
<li>Combine results from distributed searches</li>
</ul>
<p><strong>Caching strategies</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Cache frequently queried embeddings</span>
</span></span><span class="line"><span class="cl"><span class="n">embedding_cache</span> <span class="o">=</span> <span class="p">{}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">cached_search</span><span class="p">(</span><span class="n">query_text</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">cache_key</span> <span class="o">=</span> <span class="nb">hash</span><span class="p">(</span><span class="n">query_text</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">if</span> <span class="n">cache_key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">embedding_cache</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">embedding_cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">generate_embedding</span><span class="p">(</span><span class="n">query_text</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="k">await</span> <span class="n">search_by_vector</span><span class="p">(</span><span class="n">embedding_cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">])</span>
</span></span></code></pre></div>
<h3 id="troubleshooting" class="position-relative d-flex align-items-center group">
<span>Troubleshooting</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="troubleshooting"
aria-haspopup="dialog"
aria-label="Share link: Troubleshooting">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="poor-search-quality" class="position-relative d-flex align-items-center group">
<span>Poor Search Quality</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="poor-search-quality"
aria-haspopup="dialog"
aria-label="Share link: Poor Search Quality">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Problem</strong>: Results aren’t relevant
<strong>Solutions</strong>:</p>
<ol>
<li>Verify embedding model consistency</li>
<li>Check vector normalization (cosine requires normalized vectors)</li>
<li>Adjust similarity threshold</li>
<li>Retrain or upgrade embedding model</li>
</ol>
<p><strong>Problem</strong>: Slow query performance
<strong>Solutions</strong>:</p>
<ol>
<li>Increase <code>m</code> parameter (rebuild index)</li>
<li>Add filters before vector search to reduce candidate set</li>
<li>Use EXPLAIN to identify bottlenecks</li>
<li>Consider partitioning large datasets</li>
</ol>
<p><strong>Problem</strong>: High memory usage
<strong>Solutions</strong>:</p>
<ol>
<li>Reduce <code>m</code> parameter (less accuracy, less memory)</li>
<li>Use lower-dimensional embeddings if possible</li>
<li>Partition data across multiple nodes</li>
<li>Use dimensionality reduction (PCA, UMAP)</li>
</ol>
<h4 id="index-maintenance" class="position-relative d-flex align-items-center group">
<span>Index Maintenance</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="index-maintenance"
aria-haspopup="dialog"
aria-label="Share link: Index Maintenance">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Monitoring index health</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="py">SHOW</span><span class="w"> </span><span class="py">INDEXES</span><span class="w"> </span><span class="py">WHERE</span><span class="w"> </span><span class="py">name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">product_embeddings</span><span class="err">';</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Returns</span><span class="p">:</span><span class="w"> </span><span class="nc">size</span><span class="p">,</span><span class="w"> </span><span class="py">num_vectors</span><span class="p">,</span><span class="w"> </span><span class="py">build_status</span><span class="w">
</span></span></span></code></pre></div><p><strong>Rebuilding indexes</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">If</span><span class="w"> </span><span class="py">index</span><span class="w"> </span><span class="py">becomes</span><span class="w"> </span><span class="py">corrupted</span><span class="w"> </span><span class="py">or</span><span class="w"> </span><span class="py">parameters</span><span class="w"> </span><span class="py">need</span><span class="w"> </span><span class="py">changing</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">DROP</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">product_embeddings</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">VECTOR</span><span class="w"> </span><span class="py">INDEX</span><span class="w"> </span><span class="py">product_embeddings</span><span class="w"> </span><span class="py">ON</span><span class="w"> </span><span class="py">Product</span><span class="p">(</span><span class="py">embedding</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="p">(</span><span class="py">metric</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="py">dimensions</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">768</span><span class="p">,</span><span class="w"> </span><span class="py">m</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">32</span><span class="p">)</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="related-topics" class="position-relative d-flex align-items-center group">
<span>Related Topics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="related-topics"
aria-haspopup="dialog"
aria-label="Share link: Related Topics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li><strong><a
href="/tags/hnsw/"
>HNSW</a>
</strong>: Deep dive into Hierarchical Navigable Small World algorithm</li>
<li><strong><a
href="/tags/machine-learning/"
>Machine Learning</a>
</strong>: ML integration patterns with Geode</li>
<li><strong><a
href="/tags/embeddings/"
>Embeddings</a>
</strong>: Best practices for generating and storing embeddings</li>
<li><strong><a
href="/tags/performance/"
>Performance</a>
</strong>: General performance optimization techniques</li>
<li><strong><a
href="/tags/indexing/"
>Indexing</a>
</strong>: Overview of all index types in Geode</li>
<li><strong><a
href="/tags/recommendations/"
>Recommendations</a>
</strong>: Building recommendation systems</li>
</ul>
<h3 id="further-reading" class="position-relative d-flex align-items-center group">
<span>Further Reading</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="further-reading"
aria-haspopup="dialog"
aria-label="Share link: Further Reading">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li><strong>HNSW Paper</strong>: “Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs” (Malkov & Yashunin, 2018)</li>
<li><strong>Sentence Transformers</strong>: <a
href="https://www.sbert.net/"
aria-label="https://www.sbert.net/ – opens in new window"
target="_blank" rel="noopener noreferrer"
>https://www.sbert.net/
<span aria-hidden="true" class="external-icon">↗</span>
</a>
- Popular embedding models</li>
<li><strong>OpenAI Embeddings</strong>: <a
href="https://platform.openai.com/docs/guides/embeddings"
aria-label="https://platform.openai.com/docs/guides/embeddings – opens in new window"
target="_blank" rel="noopener noreferrer"
>https://platform.openai.com/docs/guides/embeddings
<span aria-hidden="true" class="external-icon">↗</span>
</a>
</li>
<li><strong>Geode Vector Search Guide</strong>: <code>/docs/advanced-features/vector-search/</code></li>
<li><strong>Performance Tuning</strong>: <code>/docs/performance/vector-optimization/</code></li>
</ul>
<h3 id="advanced-vector-search-techniques" class="position-relative d-flex align-items-center group">
<span>Advanced Vector Search Techniques</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="advanced-vector-search-techniques"
aria-haspopup="dialog"
aria-label="Share link: Advanced Vector Search Techniques">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="hybrid-dense-sparse-search" class="position-relative d-flex align-items-center group">
<span>Hybrid Dense-Sparse Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="hybrid-dense-sparse-search"
aria-haspopup="dialog"
aria-label="Share link: Hybrid Dense-Sparse Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Combine vector similarity with keyword matching:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Hybrid</span><span class="w"> </span><span class="py">search</span><span class="p">:</span><span class="w"> </span><span class="nc">HNSW</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">BM25</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">d</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">text_search</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">content</span><span class="p">,</span><span class="w"> </span><span class="nv">$keyword_query</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$vector_query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.6</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">text_score</span><span class="p">(</span><span class="py">d</span><span class="p">,</span><span class="w"> </span><span class="nv">$keyword_query</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">bm25_score</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$vector_query</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">vector_score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">doc_id</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">bm25_score</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_score</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">0</span><span class="mf">.5</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">bm25_score</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">0</span><span class="mf">.5</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">vector_score</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">hybrid_score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">hybrid_score</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">20</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="multi-vector-search" class="position-relative d-flex align-items-center group">
<span>Multi-Vector Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="multi-vector-search"
aria-haspopup="dialog"
aria-label="Share link: Multi-Vector Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Search across multiple embedding spaces:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Search</span><span class="w"> </span><span class="py">using</span><span class="w"> </span><span class="py">both</span><span class="w"> </span><span class="py">content</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">title</span><span class="w"> </span><span class="py">embeddings</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">d</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">content_embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$content_query_emb</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">content_sim</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">title_embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$title_query_emb</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">title_sim</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">0</span><span class="mf">.7</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">content_sim</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">0</span><span class="mf">.3</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">title_sim</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">combined_similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">combined_similarity</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.75</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">doc_id</span><span class="p">,</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">title</span><span class="p">,</span><span class="w"> </span><span class="py">combined_similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">combined_similarity</span><span class="w"> </span><span class="py">DESC</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="query-time-optimizations" class="position-relative d-flex align-items-center group">
<span>Query-Time Optimizations</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="query-time-optimizations"
aria-haspopup="dialog"
aria-label="Share link: Query-Time Optimizations">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="pre-filtering-vs-post-filtering" class="position-relative d-flex align-items-center group">
<span>Pre-Filtering vs Post-Filtering</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="pre-filtering-vs-post-filtering"
aria-haspopup="dialog"
aria-label="Share link: Pre-Filtering vs Post-Filtering">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Efficient</span><span class="p">:</span><span class="w"> </span><span class="nc">Pre</span><span class="err">-</span><span class="py">filter</span><span class="w"> </span><span class="py">then</span><span class="w"> </span><span class="py">vector</span><span class="w"> </span><span class="py">search</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">d</span><span class="p">:</span><span class="nc">Document</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">category</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">technical</span><span class="err">'</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">publish_date</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">date</span><span class="p">(</span><span class="err">'</span><span class="py">2024</span><span class="err">-</span><span class="py">01</span><span class="err">-</span><span class="py">01</span><span class="err">'</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">d</span><span class="err">.</span><span class="py">language</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">en</span><span class="err">'</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">d</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">0</span><span class="mf">.7</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">d</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">vector_similarity</span><span class="p">(</span><span class="py">d</span><span class="err">.</span><span class="py">embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query</span><span class="p">)</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Less</span><span class="w"> </span><span class="py">efficient</span><span class="p">:</span><span class="w"> </span><span class="nc">Vector</span><span class="w"> </span><span class="py">search</span><span class="w"> </span><span class="py">then</span><span class="w"> </span><span class="py">filter</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CALL</span><span class="w"> </span><span class="py">vector</span><span class="err">.</span><span class="py">search</span><span class="p">({</span><span class="py">index</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">docs</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="kd">query</span><span class="p">:</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w"> </span><span class="nc">k</span><span class="p">:</span><span class="w"> </span><span class="nc">1000</span><span class="p">})</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nc">YIELD</span><span class="w"> </span><span class="py">node</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">node</span><span class="err">.</span><span class="py">category</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">technical</span><span class="err">'</span><span class="w"> </span><span class="err">//</span><span class="w"> </span><span class="py">Post</span><span class="err">-</span><span class="py">filter</span><span class="w"> </span><span class="py">loses</span><span class="w"> </span><span class="py">HNSW</span><span class="w"> </span><span class="py">efficiency</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">node</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">10</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="cascaded-search" class="position-relative d-flex align-items-center group">
<span>Cascaded Search</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="cascaded-search"
aria-haspopup="dialog"
aria-label="Share link: Cascaded Search">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Fast approximate search followed by reranking:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Stage</span><span class="w"> </span><span class="py">1</span><span class="p">:</span><span class="w"> </span><span class="nc">Fast</span><span class="w"> </span><span class="py">approximate</span><span class="w"> </span><span class="py">retrieval</span><span class="w"> </span><span class="p">(</span><span class="py">top</span><span class="w"> </span><span class="py">100</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CALL</span><span class="w"> </span><span class="py">vector</span><span class="err">.</span><span class="py">search</span><span class="p">({</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">index</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">products</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">query</span><span class="p">:</span><span class="w"> </span><span class="nv">$query_embedding</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nc">k</span><span class="p">:</span><span class="w"> </span><span class="nc">100</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nc">ef</span><span class="p">:</span><span class="w"> </span><span class="nc">50</span><span class="w"> </span><span class="err">//</span><span class="w"> </span><span class="py">Lower</span><span class="w"> </span><span class="py">ef</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">speed</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">})</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">YIELD</span><span class="w"> </span><span class="py">node</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">candidate</span><span class="p">,</span><span class="w"> </span><span class="py">similarity</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">approx_score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Stage</span><span class="w"> </span><span class="py">2</span><span class="p">:</span><span class="w"> </span><span class="nc">Precise</span><span class="w"> </span><span class="py">reranking</span><span class="w"> </span><span class="p">(</span><span class="py">top</span><span class="w"> </span><span class="py">20</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">candidate</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">vector</span><span class="err">.</span><span class="py">similarity</span><span class="p">(</span><span class="py">candidate</span><span class="err">.</span><span class="py">high_quality_embedding</span><span class="p">,</span><span class="w"> </span><span class="nv">$query_embedding</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">cosine</span><span class="err">'</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">precise_score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">precise_score</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">20</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">candidate</span><span class="p">,</span><span class="w"> </span><span class="py">precise_score</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="approximate-nearest-neighbors-ann-tuning" class="position-relative d-flex align-items-center group">
<span>Approximate Nearest Neighbors (ANN) Tuning</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="approximate-nearest-neighbors-ann-tuning"
aria-haspopup="dialog"
aria-label="Share link: Approximate Nearest Neighbors (ANN) Tuning">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="hnsw-parameter-impact" class="position-relative d-flex align-items-center group">
<span>HNSW Parameter Impact</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="hnsw-parameter-impact"
aria-haspopup="dialog"
aria-label="Share link: HNSW Parameter Impact">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>M (connections per layer)</strong>:</p>
<ul>
<li>M=4: ~10MB/million vectors, 85% recall</li>
<li>M=16: ~40MB/million vectors, 95% recall</li>
<li>M=32: ~80MB/million vectors, 98% recall</li>
</ul>
<p><strong>ef_construction</strong>:</p>
<ul>
<li>ef_construction=100: Fast index build, 90% quality</li>
<li>ef_construction=200: Balanced (recommended)</li>
<li>ef_construction=400: Slow build, 98% quality</li>
</ul>
<p><strong>ef_search</strong> (query-time):</p>
<ul>
<li>ef_search=16: <1ms latency, 85% recall</li>
<li>ef_search=64: ~2ms latency, 95% recall</li>
<li>ef_search=256: ~10ms latency, 99% recall</li>
</ul>
<h4 id="dynamic-ef_search-tuning" class="position-relative d-flex align-items-center group">
<span>Dynamic ef_search Tuning</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="dynamic-ef_search-tuning"
aria-haspopup="dialog"
aria-label="Share link: Dynamic ef_search Tuning">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Adjust</span><span class="w"> </span><span class="py">ef_search</span><span class="w"> </span><span class="py">based</span><span class="w"> </span><span class="kd">on</span><span class="w"> </span><span class="kd">query</span><span class="w"> </span><span class="nc">importance</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CALL</span><span class="w"> </span><span class="py">vector</span><span class="err">.</span><span class="py">search</span><span class="p">({</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">index</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">embeddings</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">query</span><span class="p">:</span><span class="w"> </span><span class="nv">$query</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nc">k</span><span class="p">:</span><span class="w"> </span><span class="nc">10</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nc">ef</span><span class="p">:</span><span class="w"> </span><span class="nc">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="nv">$user_tier</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">'</span><span class="py">premium</span><span class="err">'</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">200</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">50</span><span class="w"> </span><span class="py">END</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">})</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">YIELD</span><span class="w"> </span><span class="py">node</span><span class="p">,</span><span class="w"> </span><span class="py">similarity</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">node</span><span class="p">,</span><span class="w"> </span><span class="py">similarity</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="quantization-and-compression" class="position-relative d-flex align-items-center group">
<span>Quantization and Compression</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="quantization-and-compression"
aria-haspopup="dialog"
aria-label="Share link: Quantization and Compression">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="scalar-quantization" class="position-relative d-flex align-items-center group">
<span>Scalar Quantization</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="scalar-quantization"
aria-haspopup="dialog"
aria-label="Share link: Scalar Quantization">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Reduce memory by 4x with minimal accuracy loss:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Quantize float32 to int8</span>
</span></span><span class="line"><span class="cl"><span class="k">def</span> <span class="nf">quantize_embeddings</span><span class="p">(</span><span class="n">embeddings</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># Find min/max for normalization</span>
</span></span><span class="line"><span class="cl"> <span class="n">min_val</span><span class="p">,</span> <span class="n">max_val</span> <span class="o">=</span> <span class="n">embeddings</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">embeddings</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Scale to [0, 255]</span>
</span></span><span class="line"><span class="cl"> <span class="n">quantized</span> <span class="o">=</span> <span class="p">((</span><span class="n">embeddings</span> <span class="o">-</span> <span class="n">min_val</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">max_val</span> <span class="o">-</span> <span class="n">min_val</span><span class="p">)</span> <span class="o">*</span> <span class="mi">255</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">uint8</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">quantized</span><span class="p">,</span> <span class="n">min_val</span><span class="p">,</span> <span class="n">max_val</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Store quantized embeddings</span>
</span></span><span class="line"><span class="cl"><span class="k">await</span> <span class="n">client</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (d:Document {doc_id: $id})
</span></span></span><span class="line"><span class="cl"><span class="s2"> SET d.embedding_quantized = $quantized,
</span></span></span><span class="line"><span class="cl"><span class="s2"> d.quantization_min = $min_val,
</span></span></span><span class="line"><span class="cl"><span class="s2"> d.quantization_max = $max_val
</span></span></span><span class="line"><span class="cl"><span class="s2">"""</span><span class="p">,</span> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="n">doc_id</span><span class="p">,</span> <span class="s2">"quantized"</span><span class="p">:</span> <span class="n">quantized</span><span class="o">.</span><span class="n">tolist</span><span class="p">(),</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"min_val"</span><span class="p">:</span> <span class="n">min_val</span><span class="p">,</span> <span class="s2">"max_val"</span><span class="p">:</span> <span class="n">max_val</span><span class="p">})</span>
</span></span></code></pre></div>
<h4 id="product-quantization-pq" class="position-relative d-flex align-items-center group">
<span>Product Quantization (PQ)</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="product-quantization-pq"
aria-haspopup="dialog"
aria-label="Share link: Product Quantization (PQ)">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Compress 1536d to ~96 bytes:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Use Faiss for product quantization</span>
</span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">faiss</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Train PQ codec</span>
</span></span><span class="line"><span class="cl"><span class="n">d</span> <span class="o">=</span> <span class="mi">1536</span> <span class="c1"># Original dimension</span>
</span></span><span class="line"><span class="cl"><span class="n">m</span> <span class="o">=</span> <span class="mi">48</span> <span class="c1"># Number of subquantizers</span>
</span></span><span class="line"><span class="cl"><span class="n">nbits</span> <span class="o">=</span> <span class="mi">8</span> <span class="c1"># Bits per code</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">pq</span> <span class="o">=</span> <span class="n">faiss</span><span class="o">.</span><span class="n">IndexPQ</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">nbits</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"><span class="n">pq</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">training_embeddings</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Encode embeddings</span>
</span></span><span class="line"><span class="cl"><span class="n">codes</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">sa_encode</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Store compressed codes</span>
</span></span><span class="line"><span class="cl"><span class="k">await</span> <span class="n">client</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (d:Document {doc_id: $id})
</span></span></span><span class="line"><span class="cl"><span class="s2"> SET d.embedding_pq = $codes
</span></span></span><span class="line"><span class="cl"><span class="s2">"""</span><span class="p">,</span> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="n">doc_id</span><span class="p">,</span> <span class="s2">"codes"</span><span class="p">:</span> <span class="n">codes</span><span class="o">.</span><span class="n">tolist</span><span class="p">()})</span>
</span></span></code></pre></div>
<h3 id="further-reading-1" class="position-relative d-flex align-items-center group">
<span>Further Reading</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="further-reading-1"
aria-haspopup="dialog"
aria-label="Share link: Further Reading">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li><strong>Vector Search</strong>: HNSW, LSH, and IVF Algorithms</li>
<li><strong>Hybrid Search</strong>: Combining Dense and Sparse Retrieval</li>
<li><strong>Quantization</strong>: Scalar, Product, and Binary Quantization</li>
<li><strong>Performance</strong>: Benchmarking and Optimization Techniques</li>
</ul>
<p>Browse tagged content for comprehensive vector search documentation.</p>
Related Articles
Graph Algorithms and Analytics
Run graph algorithms with real GQL examples, generate embeddings (Node2Vec/GraphSAGE/DeepWalk), and use vector similarity for analytics workloads
Recommendation Systems
Build personalized recommendation engines using collaborative filtering, graph embeddings, and vector similarity search with Geode