<!-- CANARY: REQ=REQ-DOCS-001; FEATURE="Docs"; ASPECT=Documentation; STATUS=TESTED; OWNER=docs; UPDATED=2026-01-28 -->
<h2 id="high-availability-in-geode" class="position-relative d-flex align-items-center group">
<span>High Availability in Geode</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="high-availability-in-geode"
aria-haspopup="dialog"
aria-label="Share link: High Availability in Geode">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h2><div id="headingShareModal" class="heading-share-modal" role="dialog" aria-modal="true" aria-labelledby="headingShareTitle" hidden>
<div class="hsm-dialog" role="document">
<div class="hsm-header">
<h2 id="headingShareTitle" class="h6 mb-0 fw-bold">Share this section</h2>
<button type="button" class="hsm-close" aria-label="Close">
<i class="fa-solid fa-xmark"></i>
</button>
</div>
<div class="hsm-body">
<label for="headingShareInput" class="form-label small text-muted mb-1 text-uppercase fw-bold" style="font-size: 0.7rem; letter-spacing: 0.5px;">Permalink</label>
<div class="input-group mb-4 hsm-url-group">
<input id="headingShareInput" type="text" class="form-control font-monospace" readonly aria-readonly="true" style="font-size: 0.85rem;" />
<button class="btn btn-primary hsm-copy" type="button" aria-label="Copy" title="Copy">
<i class="fa-duotone fa-clipboard" aria-hidden="true"></i>
</button>
</div>
<div class="small fw-bold mb-2 text-muted text-uppercase" style="font-size: 0.7rem; letter-spacing: 0.5px;">Share via</div>
<div class="hsm-share-grid">
<a id="share-twitter" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-twitter me-2"></i>Twitter
</a>
<a id="share-linkedin" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-linkedin me-2"></i>LinkedIn
</a>
<a id="share-facebook" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-facebook me-2"></i>Facebook
</a>
</div>
</div>
</div>
</div>
<style>
.heading-share-modal {
position: fixed;
inset: 0;
display: flex;
justify-content: center;
align-items: center;
background: rgba(0, 0, 0, 0.6);
z-index: 1050;
padding: 1rem;
backdrop-filter: blur(4px);
-webkit-backdrop-filter: blur(4px);
}
.heading-share-modal[hidden] { display: none !important; }
.hsm-dialog {
max-width: 420px;
width: 100%;
background: var(--bs-body-bg, #fff);
color: var(--bs-body-color, #212529);
border: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
overflow: hidden;
animation: hsm-fade-in 0.2s ease-out;
}
@keyframes hsm-fade-in {
from { opacity: 0; transform: scale(0.95); }
to { opacity: 1; transform: scale(1); }
}
[data-bs-theme="dark"] .hsm-dialog {
background: #1e293b;
border-color: rgba(255,255,255,0.1);
color: #f8f9fa;
}
.hsm-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
background: rgba(0,0,0,0.02);
}
[data-bs-theme="dark"] .hsm-header {
background: rgba(255,255,255,0.02);
border-color: rgba(255,255,255,0.1);
}
.hsm-close {
background: transparent;
border: none;
color: inherit;
opacity: 0.5;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-size: 1.2rem;
line-height: 1;
transition: opacity 0.2s;
}
.hsm-close:hover {
opacity: 1;
}
.hsm-body {
padding: 1.5rem;
}
.hsm-url-group {
display: flex !important;
align-items: stretch;
}
.hsm-url-group .form-control {
flex: 1;
min-width: 0;
margin: 0;
background: var(--bs-secondary-bg, #f8f9fa);
border-color: var(--bs-border-color, #dee2e6);
border-top-right-radius: 0;
border-bottom-right-radius: 0;
height: 42px;
}
.hsm-url-group .btn {
flex: 0 0 auto;
margin: 0;
margin-left: -1px;
border-top-left-radius: 0;
border-bottom-left-radius: 0;
height: 42px;
display: flex;
align-items: center;
justify-content: center;
padding: 0 1.25rem;
z-index: 2;
}
[data-bs-theme="dark"] .hsm-url-group .form-control {
background: #0f172a;
border-color: #334155;
color: #e2e8f0;
}
.hsm-share-grid {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.hsm-share-grid .btn {
display: flex;
align-items: center;
justify-content: center;
font-size: 0.9rem;
padding: 0.6rem;
border-color: var(--bs-border-color);
width: 100%;
}
[data-bs-theme="dark"] .hsm-share-grid .btn {
color: #e2e8f0;
border-color: #475569;
}
[data-bs-theme="dark"] .hsm-share-grid .btn:hover {
background: #334155;
border-color: #cbd5e1;
}
</style>
<script>
(function(){
const modal = document.getElementById('headingShareModal');
if(!modal) return;
const input = modal.querySelector('#headingShareInput');
const copyBtn = modal.querySelector('.hsm-copy');
const twitter = modal.querySelector('#share-twitter');
const linkedin = modal.querySelector('#share-linkedin');
const facebook = modal.querySelector('#share-facebook');
const closeBtn = modal.querySelector('.hsm-close');
let lastFocus=null;
let trapBound=false;
function buildUrl(id){ return window.location.origin + window.location.pathname + '#' + id; }
function isOpen(){ return !modal.hasAttribute('hidden'); }
function hydrate(id){
const url=buildUrl(id);
input.value=url;
const enc=encodeURIComponent(url);
const text=encodeURIComponent(document.title);
if(twitter) twitter.href=`https://twitter.com/intent/tweet?url=${enc}&text=${text}`;
if(linkedin) linkedin.href=`https://www.linkedin.com/sharing/share-offsite/?url=${enc}`;
if(facebook) facebook.href=`https://www.facebook.com/sharer/sharer.php?u=${enc}`;
}
function openModal(id){
lastFocus=document.activeElement;
hydrate(id);
if(!isOpen()){
modal.removeAttribute('hidden');
}
requestAnimationFrame(()=>{ input.focus(); });
trapFocus();
}
function closeModal(){
if(!isOpen()) return;
modal.setAttribute('hidden','');
if(lastFocus && typeof lastFocus.focus==='function') lastFocus.focus();
}
function copyCurrent(){
try{ navigator.clipboard.writeText(input.value).then(()=>feedback(true),()=>fallback()); }
catch(e){ fallback(); }
}
function fallback(){ input.select(); try{ document.execCommand('copy'); feedback(true);}catch(e){ feedback(false);} }
function feedback(ok){ if(!copyBtn) return; const icon=copyBtn.querySelector('i'); if(!icon) return; const prev=copyBtn.getAttribute('data-prev')||icon.className; if(!copyBtn.getAttribute('data-prev')) copyBtn.setAttribute('data-prev',prev); icon.className= ok ? 'fa-duotone fa-clipboard-check':'fa-duotone fa-circle-exclamation'; setTimeout(()=>{ icon.className=prev; },1800); }
function handleShareClick(e){ e.preventDefault(); const btn=e.currentTarget; const id=btn.getAttribute('data-share-target'); if(id) openModal(id); }
function bindShareButtons(){
document.querySelectorAll('.h-share').forEach(btn=>{
if(!btn.dataset.hShareBound){ btn.addEventListener('click', handleShareClick); btn.dataset.hShareBound='1'; }
});
}
bindShareButtons();
if(document.readyState==='loading'){
document.addEventListener('DOMContentLoaded', bindShareButtons);
} else {
requestAnimationFrame(bindShareButtons);
}
document.addEventListener('click', function(e){
const shareBtn=e.target.closest && e.target.closest('.h-share');
if(shareBtn && !shareBtn.dataset.hShareBound){ handleShareClick.call(shareBtn, e); }
}, true);
document.addEventListener('click', e=>{
if(e.target===modal) closeModal();
if(e.target.closest && e.target.closest('.hsm-close')){ e.preventDefault(); closeModal(); }
if(copyBtn && (e.target===copyBtn || (e.target.closest && e.target.closest('.hsm-copy')))) { e.preventDefault(); copyCurrent(); }
});
document.addEventListener('keydown', e=>{ if(e.key==='Escape' && isOpen()) closeModal(); });
function trapFocus(){
if(trapBound) return;
trapBound=true;
modal.addEventListener('keydown', f=>{ if(f.key==='Tab' && isOpen()){ const focusable=[...modal.querySelectorAll('a[href],button,input,textarea,select,[tabindex]:not([tabindex="-1"])')].filter(el=>!el.hasAttribute('disabled')); if(!focusable.length) return; const first=focusable[0]; const last=focusable[focusable.length-1]; if(f.shiftKey && document.activeElement===first){ f.preventDefault(); last.focus(); } else if(!f.shiftKey && document.activeElement===last){ f.preventDefault(); first.focus(); } } });
}
if(closeBtn) closeBtn.addEventListener('click', e=>{ e.preventDefault(); closeModal(); });
})();
</script><p>High availability (HA) ensures that Geode remains operational despite hardware failures, network issues, or planned maintenance. For mission-critical applications, downtime translates directly to lost revenue, damaged reputation, and user frustration. Geode provides comprehensive HA capabilities including automatic failover, data replication, and self-healing clusters.</p>
<p>This guide covers HA architecture, configuration, monitoring, and best practices for achieving enterprise-grade availability with Geode deployments.</p>
<h3 id="understanding-high-availability" class="position-relative d-flex align-items-center group">
<span>Understanding High Availability</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="understanding-high-availability"
aria-haspopup="dialog"
aria-label="Share link: Understanding High Availability">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="availability-metrics" class="position-relative d-flex align-items-center group">
<span>Availability Metrics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="availability-metrics"
aria-haspopup="dialog"
aria-label="Share link: Availability Metrics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Uptime Percentage</strong>: The percentage of time the system is operational</p>
<table>
<thead>
<tr>
<th>Availability</th>
<th>Downtime/Year</th>
<th>Downtime/Month</th>
<th>Downtime/Week</th>
</tr>
</thead>
<tbody>
<tr>
<td>99% (two 9s)</td>
<td>3.65 days</td>
<td>7.3 hours</td>
<td>1.68 hours</td>
</tr>
<tr>
<td>99.9% (three 9s)</td>
<td>8.76 hours</td>
<td>43.8 minutes</td>
<td>10.1 minutes</td>
</tr>
<tr>
<td>99.99% (four 9s)</td>
<td>52.6 minutes</td>
<td>4.38 minutes</td>
<td>1.01 minutes</td>
</tr>
<tr>
<td>99.999% (five 9s)</td>
<td>5.26 minutes</td>
<td>26.3 seconds</td>
<td>6.05 seconds</td>
</tr>
</tbody>
</table>
<p><strong>Recovery Objectives</strong>:</p>
<ul>
<li><strong>RTO (Recovery Time Objective)</strong>: Maximum acceptable downtime</li>
<li><strong>RPO (Recovery Point Objective)</strong>: Maximum acceptable data loss</li>
</ul>
<p>Geode’s HA features target 99.99%+ availability with near-zero RPO for synchronous replication.</p>
<h4 id="ha-components" class="position-relative d-flex align-items-center group">
<span>HA Components</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="ha-components"
aria-haspopup="dialog"
aria-label="Share link: HA Components">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>A highly available Geode deployment requires:</p>
<ol>
<li><strong>Redundant Nodes</strong>: Multiple instances to survive failures</li>
<li><strong>Data Replication</strong>: Copies of data across nodes</li>
<li><strong>Automatic Failover</strong>: Seamless transition when nodes fail</li>
<li><strong>Health Monitoring</strong>: Detection of failures and degradation</li>
<li><strong>Load Balancing</strong>: Distribution of traffic across healthy nodes</li>
</ol>
<h3 id="ha-architecture-patterns" class="position-relative d-flex align-items-center group">
<span>HA Architecture Patterns</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="ha-architecture-patterns"
aria-haspopup="dialog"
aria-label="Share link: HA Architecture Patterns">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="active-passive-primary-standby" class="position-relative d-flex align-items-center group">
<span>Active-Passive (Primary-Standby)</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="active-passive-primary-standby"
aria-haspopup="dialog"
aria-label="Share link: Active-Passive (Primary-Standby)">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>One primary node handles all traffic; standby nodes remain synchronized for failover:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-fallback" data-lang="fallback"><span class="line"><span class="cl"> ┌─────────────┐
</span></span><span class="line"><span class="cl"> Clients ───────>│ Primary │
</span></span><span class="line"><span class="cl"> │ (Active) │
</span></span><span class="line"><span class="cl"> └──────┬──────┘
</span></span><span class="line"><span class="cl"> │ Replication
</span></span><span class="line"><span class="cl"> ┌────────────┼────────────┐
</span></span><span class="line"><span class="cl"> ▼ ▼ ▼
</span></span><span class="line"><span class="cl"> ┌──────────┐ ┌──────────┐ ┌──────────┐
</span></span><span class="line"><span class="cl"> │ Standby1 │ │ Standby2 │ │ Standby3 │
</span></span><span class="line"><span class="cl"> │(Passive) │ │(Passive) │ │(Passive) │
</span></span><span class="line"><span class="cl"> └──────────┘ └──────────┘ └──────────┘
</span></span></code></pre></div><p><strong>Configuration</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="c"># geode.toml - Primary node</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"replicated"</span>
</span></span><span class="line"><span class="cl"><span class="nx">role</span> <span class="p">=</span> <span class="s2">"primary"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">replication</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"sync"</span>
</span></span><span class="line"><span class="cl"><span class="nx">factor</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">standby_nodes</span> <span class="p">=</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"standby1.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"standby2.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"standby3.geode.internal:7687"</span>
</span></span><span class="line"><span class="cl"><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">failover</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">enabled</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">promotion_strategy</span> <span class="p">=</span> <span class="s2">"automatic"</span>
</span></span><span class="line"><span class="cl"><span class="nx">min_sync_replicas</span> <span class="p">=</span> <span class="mi">1</span>
</span></span></code></pre></div><p><strong>Advantages</strong>: Simple, strong consistency
<strong>Disadvantages</strong>: Standby resources underutilized</p>
<h4 id="active-active-multi-primary" class="position-relative d-flex align-items-center group">
<span>Active-Active (Multi-Primary)</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="active-active-multi-primary"
aria-haspopup="dialog"
aria-label="Share link: Active-Active (Multi-Primary)">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Multiple nodes handle traffic simultaneously with synchronization:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-fallback" data-lang="fallback"><span class="line"><span class="cl"> ┌──────────────────────────────┐
</span></span><span class="line"><span class="cl"> │ Load Balancer │
</span></span><span class="line"><span class="cl"> └──────────────┬───────────────┘
</span></span><span class="line"><span class="cl"> ┌────────────────────┼────────────────────┐
</span></span><span class="line"><span class="cl"> ▼ ▼ ▼
</span></span><span class="line"><span class="cl"> ┌──────────┐ ┌──────────┐ ┌──────────┐
</span></span><span class="line"><span class="cl"> │ Node 1 │◄───────►│ Node 2 │◄───────►│ Node 3 │
</span></span><span class="line"><span class="cl"> │ (Active) │ Sync │ (Active) │ Sync │ (Active) │
</span></span><span class="line"><span class="cl"> └──────────┘ └──────────┘ └──────────┘
</span></span></code></pre></div><p><strong>Configuration</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="c"># geode.toml - Active node</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"distributed"</span>
</span></span><span class="line"><span class="cl"><span class="nx">role</span> <span class="p">=</span> <span class="s2">"data"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">nodes</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">seeds</span> <span class="p">=</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node1.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node2.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node3.geode.internal:7687"</span>
</span></span><span class="line"><span class="cl"><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">replication</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"sync"</span>
</span></span><span class="line"><span class="cl"><span class="nx">factor</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">read_preference</span> <span class="p">=</span> <span class="s2">"nearest"</span>
</span></span><span class="line"><span class="cl"><span class="nx">write_concern</span> <span class="p">=</span> <span class="s2">"majority"</span>
</span></span></code></pre></div><p><strong>Advantages</strong>: Better resource utilization, horizontal scaling
<strong>Disadvantages</strong>: More complex, potential for conflicts</p>
<h4 id="geode-recommended-raft-based-clustering" class="position-relative d-flex align-items-center group">
<span>Geode Recommended: Raft-Based Clustering</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="geode-recommended-raft-based-clustering"
aria-haspopup="dialog"
aria-label="Share link: Geode Recommended: Raft-Based Clustering">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Geode uses Raft consensus for leader election and strong consistency:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-fallback" data-lang="fallback"><span class="line"><span class="cl"> ┌──────────────────────────────┐
</span></span><span class="line"><span class="cl"> │ Load Balancer │
</span></span><span class="line"><span class="cl"> └──────────────┬───────────────┘
</span></span><span class="line"><span class="cl"> ┌────────────────────┼────────────────────┐
</span></span><span class="line"><span class="cl"> ▼ ▼ ▼
</span></span><span class="line"><span class="cl"> ┌──────────┐ ┌──────────┐ ┌──────────┐
</span></span><span class="line"><span class="cl"> │ Node 1 │ │ Node 2 │ │ Node 3 │
</span></span><span class="line"><span class="cl"> │ (Leader) │────────►│(Follower)│ │(Follower)│
</span></span><span class="line"><span class="cl"> │ Writes │ │ Reads │ │ Reads │
</span></span><span class="line"><span class="cl"> └──────────┘ └──────────┘ └──────────┘
</span></span><span class="line"><span class="cl"> │ ▲ ▲
</span></span><span class="line"><span class="cl"> └────────────────────┴────────────────────┘
</span></span><span class="line"><span class="cl"> Log Replication
</span></span></code></pre></div><p><strong>Configuration</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"distributed"</span>
</span></span><span class="line"><span class="cl"><span class="nx">consensus</span> <span class="p">=</span> <span class="s2">"raft"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">raft</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">election_timeout_ms</span> <span class="p">=</span> <span class="mi">1500</span>
</span></span><span class="line"><span class="cl"><span class="nx">heartbeat_interval_ms</span> <span class="p">=</span> <span class="mi">150</span>
</span></span><span class="line"><span class="cl"><span class="nx">snapshot_threshold</span> <span class="p">=</span> <span class="mi">10000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">nodes</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="c"># Odd number for majority consensus</span>
</span></span><span class="line"><span class="cl"><span class="nx">count</span> <span class="p">=</span> <span class="mi">3</span> <span class="c"># or 5 for higher fault tolerance</span>
</span></span></code></pre></div>
<h3 id="configuring-high-availability" class="position-relative d-flex align-items-center group">
<span>Configuring High Availability</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="configuring-high-availability"
aria-haspopup="dialog"
aria-label="Share link: Configuring High Availability">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="minimum-ha-cluster-3-nodes" class="position-relative d-flex align-items-center group">
<span>Minimum HA Cluster (3 Nodes)</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="minimum-ha-cluster-3-nodes"
aria-haspopup="dialog"
aria-label="Share link: Minimum HA Cluster (3 Nodes)">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>A three-node cluster tolerates one node failure:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="c"># geode.toml - Node 1</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">server</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">node_id</span> <span class="p">=</span> <span class="s2">"node1"</span>
</span></span><span class="line"><span class="cl"><span class="nx">listen</span> <span class="p">=</span> <span class="s2">"0.0.0.0:3141"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"distributed"</span>
</span></span><span class="line"><span class="cl"><span class="nx">name</span> <span class="p">=</span> <span class="s2">"production"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">nodes</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">seeds</span> <span class="p">=</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node1.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node2.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node3.geode.internal:7687"</span>
</span></span><span class="line"><span class="cl"><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">replication</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">factor</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"sync"</span>
</span></span><span class="line"><span class="cl"><span class="nx">ack_timeout_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">failover</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">enabled</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">detection_interval_ms</span> <span class="p">=</span> <span class="mi">1000</span>
</span></span><span class="line"><span class="cl"><span class="nx">failure_threshold</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">promotion_delay_ms</span> <span class="p">=</span> <span class="mi">2000</span>
</span></span></code></pre></div>
<h4 id="enhanced-ha-cluster-5-nodes" class="position-relative d-flex align-items-center group">
<span>Enhanced HA Cluster (5 Nodes)</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="enhanced-ha-cluster-5-nodes"
aria-haspopup="dialog"
aria-label="Share link: Enhanced HA Cluster (5 Nodes)">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>A five-node cluster tolerates two simultaneous node failures:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="c"># geode.toml - 5-node cluster</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"distributed"</span>
</span></span><span class="line"><span class="cl"><span class="nx">name</span> <span class="p">=</span> <span class="s2">"production-ha"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">nodes</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">seeds</span> <span class="p">=</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node1.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node2.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node3.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node4.geode.internal:7687"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node5.geode.internal:7687"</span>
</span></span><span class="line"><span class="cl"><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c"># With 5 nodes, can lose 2 and maintain majority (3)</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">replication</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">factor</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"sync"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">placement</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="c"># Spread across availability zones</span>
</span></span><span class="line"><span class="cl"><span class="nx">strategy</span> <span class="p">=</span> <span class="s2">"zone-aware"</span>
</span></span><span class="line"><span class="cl"><span class="nx">zones</span> <span class="p">=</span> <span class="p">[</span><span class="s2">"us-east-1a"</span><span class="p">,</span> <span class="s2">"us-east-1b"</span><span class="p">,</span> <span class="s2">"us-east-1c"</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">min_zones_for_write</span> <span class="p">=</span> <span class="mi">2</span>
</span></span></code></pre></div>
<h4 id="geographic-distribution" class="position-relative d-flex align-items-center group">
<span>Geographic Distribution</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="geographic-distribution"
aria-haspopup="dialog"
aria-label="Share link: Geographic Distribution">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Deploy across data centers for disaster resilience:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="c"># Multi-region configuration</span>
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">mode</span> <span class="p">=</span> <span class="s2">"geo-distributed"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">regions</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">primary</span> <span class="p">=</span> <span class="s2">"us-east"</span>
</span></span><span class="line"><span class="cl"><span class="nx">secondary</span> <span class="p">=</span> <span class="p">[</span><span class="s2">"us-west"</span><span class="p">,</span> <span class="s2">"eu-west"</span><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">region</span><span class="p">.</span><span class="nx">us-east</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">nodes</span> <span class="p">=</span> <span class="p">[</span><span class="s2">"node1-east"</span><span class="p">,</span> <span class="s2">"node2-east"</span><span class="p">,</span> <span class="s2">"node3-east"</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">priority</span> <span class="p">=</span> <span class="mi">1</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">region</span><span class="p">.</span><span class="nx">us-west</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">nodes</span> <span class="p">=</span> <span class="p">[</span><span class="s2">"node1-west"</span><span class="p">,</span> <span class="s2">"node2-west"</span><span class="p">,</span> <span class="s2">"node3-west"</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">priority</span> <span class="p">=</span> <span class="mi">2</span>
</span></span><span class="line"><span class="cl"><span class="nx">replication_mode</span> <span class="p">=</span> <span class="s2">"async"</span>
</span></span><span class="line"><span class="cl"><span class="nx">max_lag_ms</span> <span class="p">=</span> <span class="mi">1000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">cluster</span><span class="p">.</span><span class="nx">region</span><span class="p">.</span><span class="nx">eu-west</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">nodes</span> <span class="p">=</span> <span class="p">[</span><span class="s2">"node1-eu"</span><span class="p">,</span> <span class="s2">"node2-eu"</span><span class="p">,</span> <span class="s2">"node3-eu"</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">priority</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">replication_mode</span> <span class="p">=</span> <span class="s2">"async"</span>
</span></span><span class="line"><span class="cl"><span class="nx">max_lag_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span></code></pre></div>
<h3 id="automatic-failover" class="position-relative d-flex align-items-center group">
<span>Automatic Failover</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="automatic-failover"
aria-haspopup="dialog"
aria-label="Share link: Automatic Failover">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="failure-detection" class="position-relative d-flex align-items-center group">
<span>Failure Detection</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="failure-detection"
aria-haspopup="dialog"
aria-label="Share link: Failure Detection">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Geode detects failures through multiple mechanisms:</p>
<p><strong>Heartbeat Monitoring</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">heartbeat</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">interval_ms</span> <span class="p">=</span> <span class="mi">100</span>
</span></span><span class="line"><span class="cl"><span class="nx">timeout_ms</span> <span class="p">=</span> <span class="mi">500</span>
</span></span><span class="line"><span class="cl"><span class="nx">failure_count</span> <span class="p">=</span> <span class="mi">3</span> <span class="c"># 3 missed = failure</span>
</span></span></code></pre></div><p><strong>TCP Connection Health</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">connection</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">keepalive_interval_ms</span> <span class="p">=</span> <span class="mi">10000</span>
</span></span><span class="line"><span class="cl"><span class="nx">keepalive_probes</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">keepalive_timeout_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span></code></pre></div><p><strong>Application-Level Health Checks</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">checks</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">enabled</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">interval_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">checks</span><span class="p">.</span><span class="nx">storage</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">type</span> <span class="p">=</span> <span class="s2">"write_test"</span>
</span></span><span class="line"><span class="cl"><span class="nx">timeout_ms</span> <span class="p">=</span> <span class="mi">1000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">checks</span><span class="p">.</span><span class="nx">memory</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">type</span> <span class="p">=</span> <span class="s2">"threshold"</span>
</span></span><span class="line"><span class="cl"><span class="nx">max_used_percent</span> <span class="p">=</span> <span class="mi">90</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">health</span><span class="p">.</span><span class="nx">checks</span><span class="p">.</span><span class="nx">disk</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">type</span> <span class="p">=</span> <span class="s2">"threshold"</span>
</span></span><span class="line"><span class="cl"><span class="nx">min_free_percent</span> <span class="p">=</span> <span class="mi">10</span>
</span></span></code></pre></div>
<h4 id="failover-process" class="position-relative d-flex align-items-center group">
<span>Failover Process</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="failover-process"
aria-haspopup="dialog"
aria-label="Share link: Failover Process">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>When a node failure is detected:</p>
<ol>
<li><strong>Detection</strong>: Health check fails or heartbeat timeout</li>
<li><strong>Verification</strong>: Confirm failure from multiple observers</li>
<li><strong>Leader Election</strong>: Raft elects new leader if needed</li>
<li><strong>Promotion</strong>: Replicas promoted to primary for affected shards</li>
<li><strong>Client Redirect</strong>: Clients automatically reconnect</li>
<li><strong>Recovery</strong>: System rebalances when node returns</li>
</ol>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Monitor</span><span class="w"> </span><span class="py">failover</span><span class="w"> </span><span class="py">events</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">timestamp</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">event_type</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">source_node</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">target_node</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">duration_ms</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">data_loss_bytes</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">system</span><span class="err">.</span><span class="py">failover_log</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">timestamp</span><span class="w"> </span><span class="py">DESC</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">LIMIT</span><span class="w"> </span><span class="py">20</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="failover-configuration" class="position-relative d-flex align-items-center group">
<span>Failover Configuration</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="failover-configuration"
aria-haspopup="dialog"
aria-label="Share link: Failover Configuration">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">failover</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">enabled</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c"># Detection settings</span>
</span></span><span class="line"><span class="cl"><span class="nx">detection_method</span> <span class="p">=</span> <span class="s2">"consensus"</span> <span class="c"># heartbeat, consensus, or both</span>
</span></span><span class="line"><span class="cl"><span class="nx">min_observers</span> <span class="p">=</span> <span class="mi">2</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c"># Timing</span>
</span></span><span class="line"><span class="cl"><span class="nx">detection_timeout_ms</span> <span class="p">=</span> <span class="mi">3000</span>
</span></span><span class="line"><span class="cl"><span class="nx">promotion_delay_ms</span> <span class="p">=</span> <span class="mi">1000</span>
</span></span><span class="line"><span class="cl"><span class="nx">client_redirect_timeout_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c"># Behavior</span>
</span></span><span class="line"><span class="cl"><span class="nx">auto_promote</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">prefer_sync_replica</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">block_writes_during_failover</span> <span class="p">=</span> <span class="kc">false</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c"># Recovery</span>
</span></span><span class="line"><span class="cl"><span class="nx">auto_rejoin</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">rejoin_as</span> <span class="p">=</span> <span class="s2">"replica"</span> <span class="c"># replica or standby</span>
</span></span><span class="line"><span class="cl"><span class="nx">catch_up_mode</span> <span class="p">=</span> <span class="s2">"streaming"</span>
</span></span></code></pre></div>
<h4 id="client-failover-handling" class="position-relative d-flex align-items-center group">
<span>Client Failover Handling</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="client-failover-handling"
aria-haspopup="dialog"
aria-label="Share link: Client Failover Handling">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>Python Client</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span><span class="p">,</span> <span class="n">FailoverConfig</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Configure client for HA</span>
</span></span><span class="line"><span class="cl"><span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="n">hosts</span><span class="o">=</span><span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node1.geode.internal:3141"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node2.geode.internal:3141"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"node3.geode.internal:3141"</span>
</span></span><span class="line"><span class="cl"> <span class="p">],</span>
</span></span><span class="line"><span class="cl"> <span class="n">failover</span><span class="o">=</span><span class="n">FailoverConfig</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="n">enabled</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">retry_attempts</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">retry_delay_ms</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">circuit_breaker_threshold</span><span class="o">=</span><span class="mi">5</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span><span class="line"><span class="cl"><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">resilient_query</span><span class="p">():</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">try</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># Automatic retry on failover</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"MATCH (u:User {id: $id}) RETURN u"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="s2">"user-123"</span><span class="p">}</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">rows</span>
</span></span><span class="line"><span class="cl"> <span class="k">except</span> <span class="n">FailoverInProgressError</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># Wait for failover to complete</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="k">await</span> <span class="n">resilient_query</span><span class="p">()</span>
</span></span></code></pre></div><p><strong>Go Client</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-go" data-lang="go"><span class="line"><span class="cl"><span class="kn">import</span> <span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s">"database/sql"</span>
</span></span><span class="line"><span class="cl"> <span class="s">"geodedb.com/geode"</span>
</span></span><span class="line"><span class="cl"><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="kd">func</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="c1">// Connection string with multiple hosts
</span></span></span><span class="line"><span class="cl"><span class="c1"></span> <span class="nx">dsn</span> <span class="o">:=</span> <span class="s">"quic://node1:3141,node2:3141,node3:3141?failover=true"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="nx">db</span><span class="p">,</span> <span class="nx">err</span> <span class="o">:=</span> <span class="nx">sql</span><span class="p">.</span><span class="nf">Open</span><span class="p">(</span><span class="s">"geode"</span><span class="p">,</span> <span class="nx">dsn</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">if</span> <span class="nx">err</span> <span class="o">!=</span> <span class="kc">nil</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nx">log</span><span class="p">.</span><span class="nf">Fatal</span><span class="p">(</span><span class="nx">err</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="p">}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1">// Configure connection pool for HA
</span></span></span><span class="line"><span class="cl"><span class="c1"></span> <span class="nx">db</span><span class="p">.</span><span class="nf">SetMaxOpenConns</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="nx">db</span><span class="p">.</span><span class="nf">SetMaxIdleConns</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="nx">db</span><span class="p">.</span><span class="nf">SetConnMaxLifetime</span><span class="p">(</span><span class="mi">5</span> <span class="o">*</span> <span class="nx">time</span><span class="p">.</span><span class="nx">Minute</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1">// Queries automatically retry on failover
</span></span></span><span class="line"><span class="cl"><span class="c1"></span> <span class="nx">rows</span><span class="p">,</span> <span class="nx">err</span> <span class="o">:=</span> <span class="nx">db</span><span class="p">.</span><span class="nf">Query</span><span class="p">(</span><span class="s">"MATCH (u:User) RETURN u.name"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"><span class="p">}</span>
</span></span></code></pre></div>
<h3 id="load-balancing" class="position-relative d-flex align-items-center group">
<span>Load Balancing</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="load-balancing"
aria-haspopup="dialog"
aria-label="Share link: Load Balancing">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="internal-load-balancing" class="position-relative d-flex align-items-center group">
<span>Internal Load Balancing</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="internal-load-balancing"
aria-haspopup="dialog"
aria-label="Share link: Internal Load Balancing">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Geode’s query coordinators distribute load across data nodes:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">load_balancing</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">enabled</span> <span class="p">=</span> <span class="kc">true</span>
</span></span><span class="line"><span class="cl"><span class="nx">algorithm</span> <span class="p">=</span> <span class="s2">"least_connections"</span> <span class="c"># round_robin, least_connections, weighted</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">load_balancing</span><span class="p">.</span><span class="nx">weights</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="c"># Higher weight = more traffic</span>
</span></span><span class="line"><span class="cl"><span class="nx">node1</span> <span class="p">=</span> <span class="mi">100</span>
</span></span><span class="line"><span class="cl"><span class="nx">node2</span> <span class="p">=</span> <span class="mi">100</span>
</span></span><span class="line"><span class="cl"><span class="nx">node3</span> <span class="p">=</span> <span class="mi">50</span> <span class="c"># Smaller instance</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">load_balancing</span><span class="p">.</span><span class="nx">health</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="c"># Remove unhealthy nodes from rotation</span>
</span></span><span class="line"><span class="cl"><span class="nx">check_interval_ms</span> <span class="p">=</span> <span class="mi">5000</span>
</span></span><span class="line"><span class="cl"><span class="nx">unhealthy_threshold</span> <span class="p">=</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"><span class="nx">healthy_threshold</span> <span class="p">=</span> <span class="mi">2</span>
</span></span></code></pre></div>
<h4 id="external-load-balancer-configuration" class="position-relative d-flex align-items-center group">
<span>External Load Balancer Configuration</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="external-load-balancer-configuration"
aria-haspopup="dialog"
aria-label="Share link: External Load Balancer Configuration">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p><strong>HAProxy Example</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-fallback" data-lang="fallback"><span class="line"><span class="cl">frontend geode_frontend
</span></span><span class="line"><span class="cl"> bind *:3141
</span></span><span class="line"><span class="cl"> mode tcp
</span></span><span class="line"><span class="cl"> default_backend geode_backend
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl">backend geode_backend
</span></span><span class="line"><span class="cl"> mode tcp
</span></span><span class="line"><span class="cl"> balance leastconn
</span></span><span class="line"><span class="cl"> option tcp-check
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> server node1 node1.geode.internal:3141 check inter 1s fall 3 rise 2
</span></span><span class="line"><span class="cl"> server node2 node2.geode.internal:3141 check inter 1s fall 3 rise 2
</span></span><span class="line"><span class="cl"> server node3 node3.geode.internal:3141 check inter 1s fall 3 rise 2
</span></span></code></pre></div><p><strong>Kubernetes Service</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="nt">apiVersion</span><span class="p">:</span><span class="w"> </span><span class="l">v1</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">kind</span><span class="p">:</span><span class="w"> </span><span class="l">Service</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">metadata</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">geode-lb</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">spec</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l">LoadBalancer</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">ports</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="m">3141</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">targetPort</span><span class="p">:</span><span class="w"> </span><span class="m">3141</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">protocol</span><span class="p">:</span><span class="w"> </span><span class="l">TCP</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">selector</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">app</span><span class="p">:</span><span class="w"> </span><span class="l">geode</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">sessionAffinity</span><span class="p">:</span><span class="w"> </span><span class="l">ClientIP</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">sessionAffinityConfig</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">clientIP</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">timeoutSeconds</span><span class="p">:</span><span class="w"> </span><span class="m">3600</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="readwrite-splitting" class="position-relative d-flex align-items-center group">
<span>Read/Write Splitting</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="readwrite-splitting"
aria-haspopup="dialog"
aria-label="Share link: Read/Write Splitting">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Route reads to replicas, writes to primary:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-toml" data-lang="toml"><span class="line"><span class="cl"><span class="p">[</span><span class="nx">routing</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="nx">write_to</span> <span class="p">=</span> <span class="s2">"primary"</span>
</span></span><span class="line"><span class="cl"><span class="nx">read_from</span> <span class="p">=</span> <span class="s2">"nearest"</span> <span class="c"># primary, replica, or nearest</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="p">[</span><span class="nx">routing</span><span class="p">.</span><span class="nx">read_preference</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"><span class="c"># Prefer local replica, fall back to primary</span>
</span></span><span class="line"><span class="cl"><span class="nx">strategy</span> <span class="p">=</span> <span class="s2">"nearest"</span>
</span></span><span class="line"><span class="cl"><span class="nx">max_staleness_ms</span> <span class="p">=</span> <span class="mi">100</span>
</span></span></code></pre></div><p><strong>Client-Side Routing</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span><span class="p">,</span> <span class="n">ReadPreference</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">hosts</span><span class="o">=</span><span class="p">[</span><span class="s2">"node1:3141"</span><span class="p">,</span> <span class="s2">"node2:3141"</span><span class="p">,</span> <span class="s2">"node3:3141"</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">read_user</span><span class="p">(</span><span class="n">user_id</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">(</span><span class="n">read_preference</span><span class="o">=</span><span class="n">ReadPreference</span><span class="o">.</span><span class="n">NEAREST</span><span class="p">)</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"MATCH (u:User {id: $id}) RETURN u"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="n">user_id</span><span class="p">}</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">rows</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">update_user</span><span class="p">(</span><span class="n">user_id</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">(</span><span class="n">read_preference</span><span class="o">=</span><span class="n">ReadPreference</span><span class="o">.</span><span class="n">PRIMARY</span><span class="p">)</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"MATCH (u:User {id: $id}) SET u.name = $name"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="s2">"id"</span><span class="p">:</span> <span class="n">user_id</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">:</span> <span class="n">name</span><span class="p">}</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span></code></pre></div>
<h3 id="monitoring-high-availability" class="position-relative d-flex align-items-center group">
<span>Monitoring High Availability</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="monitoring-high-availability"
aria-haspopup="dialog"
aria-label="Share link: Monitoring High Availability">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="key-ha-metrics" class="position-relative d-flex align-items-center group">
<span>Key HA Metrics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="key-ha-metrics"
aria-haspopup="dialog"
aria-label="Share link: Key HA Metrics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Prometheus metrics for HA monitoring</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/metrics <span class="p">|</span> grep -E <span class="s2">"geode_cluster|geode_replication|geode_failover"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Example output</span>
</span></span><span class="line"><span class="cl">geode_cluster_nodes_total<span class="o">{</span><span class="nv">status</span><span class="o">=</span><span class="s2">"healthy"</span><span class="o">}</span> <span class="m">3</span>
</span></span><span class="line"><span class="cl">geode_cluster_nodes_total<span class="o">{</span><span class="nv">status</span><span class="o">=</span><span class="s2">"unhealthy"</span><span class="o">}</span> <span class="m">0</span>
</span></span><span class="line"><span class="cl">geode_cluster_leader_node<span class="o">{</span><span class="nv">node</span><span class="o">=</span><span class="s2">"node1"</span><span class="o">}</span> <span class="m">1</span>
</span></span><span class="line"><span class="cl">geode_replication_lag_seconds<span class="o">{</span><span class="nv">shard</span><span class="o">=</span><span class="s2">"1"</span>,replica<span class="o">=</span><span class="s2">"node2"</span><span class="o">}</span> 0.005
</span></span><span class="line"><span class="cl">geode_replication_lag_seconds<span class="o">{</span><span class="nv">shard</span><span class="o">=</span><span class="s2">"1"</span>,replica<span class="o">=</span><span class="s2">"node3"</span><span class="o">}</span> 0.008
</span></span><span class="line"><span class="cl">geode_failover_events_total<span class="o">{</span><span class="nv">type</span><span class="o">=</span><span class="s2">"automatic"</span><span class="o">}</span> <span class="m">2</span>
</span></span><span class="line"><span class="cl">geode_failover_duration_seconds_sum 3.45
</span></span></code></pre></div>
<h4 id="health-check-endpoints" class="position-relative d-flex align-items-center group">
<span>Health Check Endpoints</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="health-check-endpoints"
aria-haspopup="dialog"
aria-label="Share link: Health Check Endpoints">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Liveness probe - is the process running?</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/health/live
</span></span><span class="line"><span class="cl"><span class="c1"># Response: {"status": "ok", "uptime_seconds": 86400}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Readiness probe - can it serve traffic?</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/health/ready
</span></span><span class="line"><span class="cl"><span class="c1"># Response: {"status": "ready", "role": "leader", "replicas_synced": 2}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Cluster health - overall cluster status</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/health/cluster
</span></span><span class="line"><span class="cl"><span class="c1"># Response: {</span>
</span></span><span class="line"><span class="cl"><span class="c1"># "status": "healthy",</span>
</span></span><span class="line"><span class="cl"><span class="c1"># "nodes": {"total": 3, "healthy": 3, "unhealthy": 0},</span>
</span></span><span class="line"><span class="cl"><span class="c1"># "replication": {"in_sync": true, "max_lag_ms": 12}</span>
</span></span><span class="line"><span class="cl"><span class="c1"># }</span>
</span></span></code></pre></div>
<h4 id="alerting-rules" class="position-relative d-flex align-items-center group">
<span>Alerting Rules</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="alerting-rules"
aria-haspopup="dialog"
aria-label="Share link: Alerting Rules">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-yaml" data-lang="yaml"><span class="line"><span class="cl"><span class="c"># Prometheus alerting rules for HA</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="nt">groups</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l">geode_ha_alerts</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">rules</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">alert</span><span class="p">:</span><span class="w"> </span><span class="l">GeodeNodeDown</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">expr</span><span class="p">:</span><span class="w"> </span><span class="l">geode_cluster_nodes_total{status="unhealthy"} > 0</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">for</span><span class="p">:</span><span class="w"> </span><span class="l">30s</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">severity</span><span class="p">:</span><span class="w"> </span><span class="l">critical</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">summary</span><span class="p">:</span><span class="w"> </span><span class="s2">"Geode cluster has unhealthy nodes"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s2">"{{ $value }} nodes are unhealthy"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">alert</span><span class="p">:</span><span class="w"> </span><span class="l">GeodeReplicationLagHigh</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">expr</span><span class="p">:</span><span class="w"> </span><span class="l">geode_replication_lag_seconds > 1</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">for</span><span class="p">:</span><span class="w"> </span><span class="l">1m</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">severity</span><span class="p">:</span><span class="w"> </span><span class="l">warning</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">summary</span><span class="p">:</span><span class="w"> </span><span class="s2">"High replication lag detected"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s2">"Replication lag is {{ $value }}s"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">alert</span><span class="p">:</span><span class="w"> </span><span class="l">GeodeNoQuorum</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">expr</span><span class="p">:</span><span class="w"> </span><span class="l">geode_cluster_nodes_total{status="healthy"} < 2</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">for</span><span class="p">:</span><span class="w"> </span><span class="l">10s</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">severity</span><span class="p">:</span><span class="w"> </span><span class="l">critical</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">summary</span><span class="p">:</span><span class="w"> </span><span class="s2">"Geode cluster lost quorum"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s2">"Only {{ $value }} healthy nodes remain"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">alert</span><span class="p">:</span><span class="w"> </span><span class="l">GeodeFailoverFrequent</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">expr</span><span class="p">:</span><span class="w"> </span><span class="l">rate(geode_failover_events_total[1h]) > 3</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">for</span><span class="p">:</span><span class="w"> </span><span class="l">5m</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">severity</span><span class="p">:</span><span class="w"> </span><span class="l">warning</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">summary</span><span class="p">:</span><span class="w"> </span><span class="s2">"Frequent failovers detected"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="s2">"{{ $value }} failovers in the last hour"</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span>- <span class="nt">alert</span><span class="p">:</span><span class="w"> </span><span class="l">GeodeLeaderElectionStuck</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">expr</span><span class="p">:</span><span class="w"> </span><span class="l">geode_cluster_leader_election_in_progress == 1</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">for</span><span class="p">:</span><span class="w"> </span><span class="l">30s</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">severity</span><span class="p">:</span><span class="w"> </span><span class="l">critical</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="nt">summary</span><span class="p">:</span><span class="w"> </span><span class="s2">"Leader election taking too long"</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="grafana-dashboard" class="position-relative d-flex align-items-center group">
<span>Grafana Dashboard</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="grafana-dashboard"
aria-haspopup="dialog"
aria-label="Share link: Grafana Dashboard">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-json" data-lang="json"><span class="line"><span class="cl"><span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"dashboard"</span><span class="p">:</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"title"</span><span class="p">:</span> <span class="s2">"Geode High Availability"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"panels"</span><span class="p">:</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"title"</span><span class="p">:</span> <span class="s2">"Cluster Health"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"type"</span><span class="p">:</span> <span class="s2">"stat"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"targets"</span><span class="p">:</span> <span class="p">[{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"expr"</span><span class="p">:</span> <span class="s2">"geode_cluster_nodes_total{status='healthy'}"</span>
</span></span><span class="line"><span class="cl"> <span class="p">}]</span>
</span></span><span class="line"><span class="cl"> <span class="p">},</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"title"</span><span class="p">:</span> <span class="s2">"Replication Lag"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"type"</span><span class="p">:</span> <span class="s2">"graph"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"targets"</span><span class="p">:</span> <span class="p">[{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"expr"</span><span class="p">:</span> <span class="s2">"geode_replication_lag_seconds"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"legendFormat"</span><span class="p">:</span> <span class="s2">"{{shard}} -> {{replica}}"</span>
</span></span><span class="line"><span class="cl"> <span class="p">}]</span>
</span></span><span class="line"><span class="cl"> <span class="p">},</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"title"</span><span class="p">:</span> <span class="s2">"Failover Events"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"type"</span><span class="p">:</span> <span class="s2">"graph"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"targets"</span><span class="p">:</span> <span class="p">[{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"expr"</span><span class="p">:</span> <span class="s2">"rate(geode_failover_events_total[5m])"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"legendFormat"</span><span class="p">:</span> <span class="s2">"Failovers/min"</span>
</span></span><span class="line"><span class="cl"> <span class="p">}]</span>
</span></span><span class="line"><span class="cl"> <span class="p">},</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"title"</span><span class="p">:</span> <span class="s2">"Node Roles"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"type"</span><span class="p">:</span> <span class="s2">"table"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"targets"</span><span class="p">:</span> <span class="p">[{</span>
</span></span><span class="line"><span class="cl"> <span class="nt">"expr"</span><span class="p">:</span> <span class="s2">"geode_cluster_node_role"</span>
</span></span><span class="line"><span class="cl"> <span class="p">}]</span>
</span></span><span class="line"><span class="cl"> <span class="p">}</span>
</span></span><span class="line"><span class="cl"> <span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="p">}</span>
</span></span><span class="line"><span class="cl"><span class="p">}</span>
</span></span></code></pre></div>
<h3 id="testing-high-availability" class="position-relative d-flex align-items-center group">
<span>Testing High Availability</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="testing-high-availability"
aria-haspopup="dialog"
aria-label="Share link: Testing High Availability">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="chaos-engineering" class="position-relative d-flex align-items-center group">
<span>Chaos Engineering</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="chaos-engineering"
aria-haspopup="dialog"
aria-label="Share link: Chaos Engineering">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Verify HA behavior by intentionally causing failures:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-bash" data-lang="bash"><span class="line"><span class="cl"><span class="c1"># Kill a node and verify automatic failover</span>
</span></span><span class="line"><span class="cl">docker stop geode-node2
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Verify cluster continues operating</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/health/cluster
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Verify queries still work</span>
</span></span><span class="line"><span class="cl">./geode shell --host node1:3141 -c <span class="s2">"MATCH (n) RETURN count(n)"</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Restart node and verify rejoin</span>
</span></span><span class="line"><span class="cl">docker start geode-node2
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Verify node rejoined and synced</span>
</span></span><span class="line"><span class="cl">curl http://node1:3141/health/cluster
</span></span></code></pre></div><p><strong>Automated HA Test Script</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">asyncio</span>
</span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">subprocess</span>
</span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">test_failover</span><span class="p">():</span>
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">hosts</span><span class="o">=</span><span class="p">[</span><span class="s2">"node1:3141"</span><span class="p">,</span> <span class="s2">"node2:3141"</span><span class="p">,</span> <span class="s2">"node3:3141"</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Verify initial state</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"MATCH (n) RETURN count(n) as cnt"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">initial_count</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">rows</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'cnt'</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Initial node count: </span><span class="si">{</span><span class="n">initial_count</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Kill a node</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="s2">"Stopping node2..."</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">([</span><span class="s2">"docker"</span><span class="p">,</span> <span class="s2">"stop"</span><span class="p">,</span> <span class="s2">"geode-node2"</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Wait for failover</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Verify cluster still works</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"MATCH (n) RETURN count(n) as cnt"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">assert</span> <span class="n">result</span><span class="o">.</span><span class="n">rows</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'cnt'</span><span class="p">]</span> <span class="o">==</span> <span class="n">initial_count</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="s2">"Cluster operational after failover"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Restart node</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="s2">"Restarting node2..."</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">([</span><span class="s2">"docker"</span><span class="p">,</span> <span class="s2">"start"</span><span class="p">,</span> <span class="s2">"geode-node2"</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Wait for rejoin</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Verify full recovery</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">conn</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"SELECT * FROM system.cluster_nodes WHERE status = 'healthy'"</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">rows</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="s2">"Full cluster recovered"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">asyncio</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">test_failover</span><span class="p">())</span>
</span></span></code></pre></div>
<h4 id="disaster-recovery-drills" class="position-relative d-flex align-items-center group">
<span>Disaster Recovery Drills</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="disaster-recovery-drills"
aria-haspopup="dialog"
aria-label="Share link: Disaster Recovery Drills">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Regularly test full recovery procedures:</p>
<ol>
<li><strong>Simulate complete cluster failure</strong></li>
<li><strong>Restore from backup</strong></li>
<li><strong>Verify data integrity</strong></li>
<li><strong>Measure RTO and RPO</strong></li>
<li><strong>Document and improve procedures</strong></li>
</ol>
<h3 id="best-practices" class="position-relative d-flex align-items-center group">
<span>Best Practices</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="best-practices"
aria-haspopup="dialog"
aria-label="Share link: Best Practices">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3>
<h4 id="deployment" class="position-relative d-flex align-items-center group">
<span>Deployment</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="deployment"
aria-haspopup="dialog"
aria-label="Share link: Deployment">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><ol>
<li><strong>Use odd number of nodes</strong>: 3, 5, or 7 for clean majority</li>
<li><strong>Spread across failure domains</strong>: Different racks, AZs, or regions</li>
<li><strong>Size for N+1 capacity</strong>: Each node handles (total load / N-1)</li>
<li><strong>Use dedicated networks</strong>: Separate client and replication traffic</li>
</ol>
<h4 id="configuration" class="position-relative d-flex align-items-center group">
<span>Configuration</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="configuration"
aria-haspopup="dialog"
aria-label="Share link: Configuration">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><ol>
<li><strong>Enable synchronous replication</strong>: For zero RPO</li>
<li><strong>Configure appropriate timeouts</strong>: Balance detection speed vs false positives</li>
<li><strong>Set conservative health thresholds</strong>: Avoid unnecessary failovers</li>
<li><strong>Test failover regularly</strong>: Verify HA actually works</li>
</ol>
<h4 id="operations" class="position-relative d-flex align-items-center group">
<span>Operations</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="operations"
aria-haspopup="dialog"
aria-label="Share link: Operations">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><ol>
<li><strong>Monitor replication lag</strong>: Alert before it becomes critical</li>
<li><strong>Perform rolling upgrades</strong>: One node at a time</li>
<li><strong>Maintain runbooks</strong>: Document recovery procedures</li>
<li><strong>Practice disaster recovery</strong>: Regular drills</li>
</ol>
<h4 id="client-applications" class="position-relative d-flex align-items-center group">
<span>Client Applications</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="client-applications"
aria-haspopup="dialog"
aria-label="Share link: Client Applications">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><ol>
<li><strong>Configure connection pools</strong>: Multiple connections for resilience</li>
<li><strong>Implement retry logic</strong>: Handle transient failures</li>
<li><strong>Use circuit breakers</strong>: Prevent cascade failures</li>
<li><strong>Handle failover gracefully</strong>: Inform users of temporary issues</li>
</ol>
<h3 id="related-topics" class="position-relative d-flex align-items-center group">
<span>Related Topics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="related-topics"
aria-haspopup="dialog"
aria-label="Share link: Related Topics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li><a
href="/tags/distributed-systems/"
>Distributed Systems</a>
- Distributed architecture fundamentals</li>
<li><a
href="/tags/recovery/"
>Recovery</a>
- Backup and disaster recovery</li>
<li><a
href="/tags/clustering/"
>Clustering</a>
- Cluster setup and management</li>
<li><a
href="/tags/deployment/"
>Deployment</a>
- Production deployment patterns</li>
<li><a
href="/tags/monitoring/"
>Monitoring</a>
- Observability and alerting</li>
<li><a
href="/tags/replication/"
>Replication</a>
- Data replication strategies</li>
</ul>
<h3 id="further-reading" class="position-relative d-flex align-items-center group">
<span>Further Reading</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="further-reading"
aria-haspopup="dialog"
aria-label="Share link: Further Reading">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li>High Availability Architecture Guide</li>
<li>Failover Testing Procedures</li>
<li>Disaster Recovery Planning</li>
<li>SLA Management Guide</li>
<li>Chaos Engineering Handbook</li>
<li>Production Operations Checklist</li>
</ul>
Related Articles
Disaster Recovery
Complete disaster recovery guide for Geode including DR planning, RTO/RPO objectives, failover procedures, and business continuity
Backup and Restore Guide
Comprehensive guide to backing up and restoring Geode databases
High Availability Guide
Configure Geode for high availability with replication, failover, and disaster recovery