<!-- CANARY: REQ=REQ-DOCS-001; FEATURE="Docs"; ASPECT=Documentation; STATUS=TESTED; OWNER=docs; UPDATED=2026-01-15 --> <h2 id="data-quality-management" class="position-relative d-flex align-items-center group"> <span>Data Quality Management</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="data-quality-management" aria-haspopup="dialog" aria-label="Share link: Data Quality Management"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h2><div id="headingShareModal" class="heading-share-modal" role="dialog" aria-modal="true" aria-labelledby="headingShareTitle" hidden> <div class="hsm-dialog" role="document"> <div class="hsm-header"> <h2 id="headingShareTitle" class="h6 mb-0 fw-bold">Share this section</h2> <button type="button" class="hsm-close" aria-label="Close"> <i class="fa-solid fa-xmark"></i> </button> </div> <div class="hsm-body"> <label for="headingShareInput" class="form-label small text-muted mb-1 text-uppercase fw-bold" style="font-size: 0.7rem; letter-spacing: 0.5px;">Permalink</label> <div class="input-group mb-4 hsm-url-group"> <input id="headingShareInput" type="text" class="form-control font-monospace" readonly aria-readonly="true" style="font-size: 0.85rem;" /> <button class="btn btn-primary hsm-copy" type="button" aria-label="Copy" title="Copy"> <i class="fa-duotone fa-clipboard" aria-hidden="true"></i> </button> </div> <div class="small fw-bold mb-2 text-muted text-uppercase" style="font-size: 0.7rem; letter-spacing: 0.5px;">Share via</div> <div class="hsm-share-grid"> <a id="share-twitter" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-twitter me-2"></i>Twitter </a> <a id="share-linkedin" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-linkedin me-2"></i>LinkedIn </a> <a id="share-facebook" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer"> <i class="fa-brands fa-facebook me-2"></i>Facebook </a> </div> </div> </div> </div> <style> .heading-share-modal { position: fixed; inset: 0; display: flex; justify-content: center; align-items: center; background: rgba(0, 0, 0, 0.6); z-index: 1050; padding: 1rem; backdrop-filter: blur(4px); -webkit-backdrop-filter: blur(4px); } .heading-share-modal[hidden] { display: none !important; } .hsm-dialog { max-width: 420px; width: 100%; background: var(--bs-body-bg, #fff); color: var(--bs-body-color, #212529); border: 1px solid var(--bs-border-color, rgba(0,0,0,0.1)); border-radius: 1rem; box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25); overflow: hidden; animation: hsm-fade-in 0.2s ease-out; } @keyframes hsm-fade-in { from { opacity: 0; transform: scale(0.95); } to { opacity: 1; transform: scale(1); } } [data-bs-theme="dark"] .hsm-dialog { background: #1e293b; border-color: rgba(255,255,255,0.1); color: #f8f9fa; } .hsm-header { display: flex; justify-content: space-between; align-items: center; padding: 1rem 1.5rem; border-bottom: 1px solid var(--bs-border-color, rgba(0,0,0,0.1)); background: rgba(0,0,0,0.02); } [data-bs-theme="dark"] .hsm-header { background: rgba(255,255,255,0.02); border-color: rgba(255,255,255,0.1); } .hsm-close { background: transparent; border: none; color: inherit; opacity: 0.5; padding: 0.25rem 0.5rem; border-radius: 0.25rem; font-size: 1.2rem; line-height: 1; transition: opacity 0.2s; } .hsm-close:hover { opacity: 1; } .hsm-body { padding: 1.5rem; } .hsm-url-group { display: flex !important; align-items: stretch; } .hsm-url-group .form-control { flex: 1; min-width: 0; margin: 0; background: var(--bs-secondary-bg, #f8f9fa); border-color: var(--bs-border-color, #dee2e6); border-top-right-radius: 0; border-bottom-right-radius: 0; height: 42px; } .hsm-url-group .btn { flex: 0 0 auto; margin: 0; margin-left: -1px; border-top-left-radius: 0; border-bottom-left-radius: 0; height: 42px; display: flex; align-items: center; justify-content: center; padding: 0 1.25rem; z-index: 2; } [data-bs-theme="dark"] .hsm-url-group .form-control { background: #0f172a; border-color: #334155; color: #e2e8f0; } .hsm-share-grid { display: flex; flex-direction: column; gap: 0.5rem; } .hsm-share-grid .btn { display: flex; align-items: center; justify-content: center; font-size: 0.9rem; padding: 0.6rem; border-color: var(--bs-border-color); width: 100%; } [data-bs-theme="dark"] .hsm-share-grid .btn { color: #e2e8f0; border-color: #475569; } [data-bs-theme="dark"] .hsm-share-grid .btn:hover { background: #334155; border-color: #cbd5e1; } </style> <script> (function(){ const modal = document.getElementById('headingShareModal'); if(!modal) return; const input = modal.querySelector('#headingShareInput'); const copyBtn = modal.querySelector('.hsm-copy'); const twitter = modal.querySelector('#share-twitter'); const linkedin = modal.querySelector('#share-linkedin'); const facebook = modal.querySelector('#share-facebook'); const closeBtn = modal.querySelector('.hsm-close'); let lastFocus=null; let trapBound=false; function buildUrl(id){ return window.location.origin + window.location.pathname + '#' + id; } function isOpen(){ return !modal.hasAttribute('hidden'); } function hydrate(id){ const url=buildUrl(id); input.value=url; const enc=encodeURIComponent(url); const text=encodeURIComponent(document.title); if(twitter) twitter.href=`https://twitter.com/intent/tweet?url=${enc}&text=${text}`; if(linkedin) linkedin.href=`https://www.linkedin.com/sharing/share-offsite/?url=${enc}`; if(facebook) facebook.href=`https://www.facebook.com/sharer/sharer.php?u=${enc}`; } function openModal(id){ lastFocus=document.activeElement; hydrate(id); if(!isOpen()){ modal.removeAttribute('hidden'); } requestAnimationFrame(()=>{ input.focus(); }); trapFocus(); } function closeModal(){ if(!isOpen()) return; modal.setAttribute('hidden',''); if(lastFocus && typeof lastFocus.focus==='function') lastFocus.focus(); } function copyCurrent(){ try{ navigator.clipboard.writeText(input.value).then(()=>feedback(true),()=>fallback()); } catch(e){ fallback(); } } function fallback(){ input.select(); try{ document.execCommand('copy'); feedback(true);}catch(e){ feedback(false);} } function feedback(ok){ if(!copyBtn) return; const icon=copyBtn.querySelector('i'); if(!icon) return; const prev=copyBtn.getAttribute('data-prev')||icon.className; if(!copyBtn.getAttribute('data-prev')) copyBtn.setAttribute('data-prev',prev); icon.className= ok ? 'fa-duotone fa-clipboard-check':'fa-duotone fa-circle-exclamation'; setTimeout(()=>{ icon.className=prev; },1800); } function handleShareClick(e){ e.preventDefault(); const btn=e.currentTarget; const id=btn.getAttribute('data-share-target'); if(id) openModal(id); } function bindShareButtons(){ document.querySelectorAll('.h-share').forEach(btn=>{ if(!btn.dataset.hShareBound){ btn.addEventListener('click', handleShareClick); btn.dataset.hShareBound='1'; } }); } bindShareButtons(); if(document.readyState==='loading'){ document.addEventListener('DOMContentLoaded', bindShareButtons); } else { requestAnimationFrame(bindShareButtons); } document.addEventListener('click', function(e){ const shareBtn=e.target.closest && e.target.closest('.h-share'); if(shareBtn && !shareBtn.dataset.hShareBound){ handleShareClick.call(shareBtn, e); } }, true); document.addEventListener('click', e=>{ if(e.target===modal) closeModal(); if(e.target.closest && e.target.closest('.hsm-close')){ e.preventDefault(); closeModal(); } if(copyBtn && (e.target===copyBtn || (e.target.closest && e.target.closest('.hsm-copy')))) { e.preventDefault(); copyCurrent(); } }); document.addEventListener('keydown', e=>{ if(e.key==='Escape' && isOpen()) closeModal(); }); function trapFocus(){ if(trapBound) return; trapBound=true; modal.addEventListener('keydown', f=>{ if(f.key==='Tab' && isOpen()){ const focusable=[...modal.querySelectorAll('a[href],button,input,textarea,select,[tabindex]:not([tabindex="-1"])')].filter(el=>!el.hasAttribute('disabled')); if(!focusable.length) return; const first=focusable[0]; const last=focusable[focusable.length-1]; if(f.shiftKey && document.activeElement===first){ f.preventDefault(); last.focus(); } else if(!f.shiftKey && document.activeElement===last){ f.preventDefault(); first.focus(); } } }); } if(closeBtn) closeBtn.addEventListener('click', e=>{ e.preventDefault(); closeModal(); }); })(); </script><p>Data quality management in Geode encompasses the processes, techniques, and tools that ensure your graph database contains accurate, complete, consistent, and timely data. High-quality data is essential for reliable analytics, trustworthy decision-making, and operational efficiency. Poor data quality leads to incorrect insights, failed transactions, and loss of user trust.</p> <h3 id="the-six-dimensions-of-data-quality" class="position-relative d-flex align-items-center group"> <span>The Six Dimensions of Data Quality</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="the-six-dimensions-of-data-quality" aria-haspopup="dialog" aria-label="Share link: The Six Dimensions of Data Quality"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p>Understanding these six fundamental dimensions helps frame your quality management strategy:</p> <h4 id="1-accuracy" class="position-relative d-flex align-items-center group"> <span>1. Accuracy</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="1-accuracy" aria-haspopup="dialog" aria-label="Share link: 1. Accuracy"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Accuracy measures whether data correctly represents the real-world entities and relationships it describes. Inaccurate data leads to wrong conclusions and faulty business decisions.</p> <p><strong>Examples</strong>:</p> <ul> <li>Person&rsquo;s age matches their birth date</li> <li>Product prices reflect current market rates</li> <li>Addresses correspond to real locations</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Verify</span><span class="w"> </span><span class="py">age</span><span class="w"> </span><span class="py">matches</span><span class="w"> </span><span class="py">birth</span><span class="w"> </span><span class="py">date</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="p">!=</span><span class="w"> </span><span class="py">EXTRACT</span><span class="p">(</span><span class="py">YEAR</span><span class="w"> </span><span class="py">FROM</span><span class="w"> </span><span class="py">AGE</span><span class="p">(</span><span class="py">CURRENT_DATE</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="p">))</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">age</span><span class="w"> </span><span class="py">mismatch</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="2-completeness" class="position-relative d-flex align-items-center group"> <span>2. Completeness</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="2-completeness" aria-haspopup="dialog" aria-label="Share link: 2. Completeness"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Completeness measures whether all required data is present. Missing critical data prevents proper graph traversals and analytics.</p> <p><strong>Examples</strong>:</p> <ul> <li>All customers have email addresses</li> <li>Products have prices and SKUs</li> <li>Employees have hire dates</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">incomplete</span><span class="w"> </span><span class="py">records</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">created_at</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">missing</span><span class="w"> </span><span class="py">required</span><span class="w"> </span><span class="py">fields</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Completeness</span><span class="w"> </span><span class="py">percentage</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">with_phone</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">),</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">phone_completeness_pct</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="3-consistency" class="position-relative d-flex align-items-center group"> <span>3. Consistency</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="3-consistency" aria-haspopup="dialog" aria-label="Share link: 3. Consistency"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Consistency ensures data is uniform across the system and doesn&rsquo;t contradict itself. Inconsistent data creates confusion and unreliable queries.</p> <p><strong>Examples</strong>:</p> <ul> <li>Same email format across all records</li> <li>Consistent date formats</li> <li>Matching foreign key references</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Check</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">date</span><span class="w"> </span><span class="py">inconsistencies</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">o</span><span class="p">:</span><span class="nc">Order</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">created_date</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">delivered_date</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">created_date</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">delivered_date</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">date</span><span class="w"> </span><span class="py">sequence</span><span class="w"> </span><span class="py">error</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">orphaned</span><span class="w"> </span><span class="py">relationships</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">e</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r</span><span class="p">:</span><span class="nc">WORKS_FOR</span><span class="p">]</span><span class="err">-&gt;</span><span class="p">(</span><span class="py">c</span><span class="p">:</span><span class="nc">Company</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">EXISTS</span><span class="p">((</span><span class="py">e</span><span class="p">:</span><span class="nc">Employee</span><span class="p">))</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">e</span><span class="p">,</span><span class="w"> </span><span class="py">r</span><span class="p">,</span><span class="w"> </span><span class="py">c</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">orphaned</span><span class="w"> </span><span class="py">relationship</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="4-timeliness" class="position-relative d-flex align-items-center group"> <span>4. Timeliness</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="4-timeliness" aria-haspopup="dialog" aria-label="Share link: 4. Timeliness"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Timeliness measures whether data is up-to-date and available when needed. Stale data leads to decisions based on outdated information.</p> <p><strong>Examples</strong>:</p> <ul> <li>Inventory levels updated in real-time</li> <li>User profiles reflect recent changes</li> <li>Analytics dashboards show current data</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">stale</span><span class="w"> </span><span class="py">records</span><span class="w"> </span><span class="p">(</span><span class="py">not</span><span class="w"> </span><span class="py">updated</span><span class="w"> </span><span class="py">in</span><span class="w"> </span><span class="py">30</span><span class="w"> </span><span class="py">days</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">updated_at</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="p">(</span><span class="py">NOW</span><span class="p">()</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">INTERVAL</span><span class="w"> </span><span class="err">&#39;</span><span class="py">30</span><span class="w"> </span><span class="py">days</span><span class="err">&#39;</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">sku</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">updated_at</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">stale</span><span class="w"> </span><span class="py">data</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="5-validity" class="position-relative d-flex align-items-center group"> <span>5. Validity</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="5-validity" aria-haspopup="dialog" aria-label="Share link: 5. Validity"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Validity ensures data conforms to defined formats, ranges, and business rules. Invalid data fails to meet schema constraints.</p> <p><strong>Examples</strong>:</p> <ul> <li>Email addresses match email format</li> <li>Ages within reasonable range (0-150)</li> <li>Dates are valid calendar dates</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">invalid</span><span class="w"> </span><span class="py">formats</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="err">~</span><span class="w"> </span><span class="err">&#39;^</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z0</span><span class="err">-</span><span class="py">9</span><span class="err">.</span><span class="py">_</span><span class="err">%+-</span><span class="p">]</span><span class="err">+@</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z0</span><span class="err">-</span><span class="py">9</span><span class="err">.-</span><span class="p">]</span><span class="err">+\.</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z</span><span class="p">]{</span><span class="py">2</span><span class="p">,}</span><span class="err">$&#39;</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">invalid</span><span class="w"> </span><span class="py">email</span><span class="w"> </span><span class="py">format</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">out</span><span class="err">-</span><span class="py">of</span><span class="err">-</span><span class="py">range</span><span class="w"> </span><span class="py">values</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="py">150</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">age</span><span class="w"> </span><span class="py">out</span><span class="w"> </span><span class="py">of</span><span class="w"> </span><span class="py">range</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="6-uniqueness" class="position-relative d-flex align-items-center group"> <span>6. Uniqueness</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="6-uniqueness" aria-haspopup="dialog" aria-label="Share link: 6. Uniqueness"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><p>Uniqueness ensures no duplicate records exist that should be unique. Duplicates skew analytics and create confusion.</p> <p><strong>Examples</strong>:</p> <ul> <li>No duplicate email addresses</li> <li>Unique product SKUs</li> <li>Single canonical record per entity</li> </ul> <p><strong>Validation</strong>:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">emails</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="py">COLLECT</span><span class="p">(</span><span class="py">p</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">persons</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">SIZE</span><span class="p">(</span><span class="py">persons</span><span class="p">)</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="py">1</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="py">SIZE</span><span class="p">(</span><span class="py">persons</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">duplicate_count</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">duplicate</span><span class="w"> </span><span class="py">email</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h3 id="data-profiling" class="position-relative d-flex align-items-center group"> <span>Data Profiling</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="data-profiling" aria-haspopup="dialog" aria-label="Share link: Data Profiling"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p>Data profiling analyzes your database to understand patterns, distributions, and anomalies. Regular profiling helps identify quality issues before they impact operations.</p> <h4 id="statistical-profiling" class="position-relative d-flex align-items-center group"> <span>Statistical Profiling</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="statistical-profiling" aria-haspopup="dialog" aria-label="Share link: Statistical Profiling"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Comprehensive</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">profile</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">Person</span><span class="w"> </span><span class="py">nodes</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total_persons</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">emails_present</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">phones_present</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">DISTINCT</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">distinct_countries</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MIN</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">min_age</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MAX</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">max_age</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AVG</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">avg_age</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">PERCENTILE_CONT</span><span class="p">(</span><span class="py">0</span><span class="mf">.5</span><span class="p">)</span><span class="w"> </span><span class="py">WITHIN</span><span class="w"> </span><span class="py">GROUP</span><span class="w"> </span><span class="p">(</span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">median_age</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="distribution-analysis" class="position-relative d-flex align-items-center group"> <span>Distribution Analysis</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="distribution-analysis" aria-haspopup="dialog" aria-label="Share link: Distribution Analysis"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - analyze age distribution</span> </span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span> </span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">profile_age_distribution</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span> </span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">client</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">&#34;&#34;&#34; </span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (p:Person) </span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.age IS NOT NULL </span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN </span></span></span><span class="line"><span class="cl"><span class="s2"> CASE </span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age &lt; 18 THEN &#39;Under 18&#39; </span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age &lt; 30 THEN &#39;18-29&#39; </span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age &lt; 50 THEN &#39;30-49&#39; </span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age &lt; 65 THEN &#39;50-64&#39; </span></span></span><span class="line"><span class="cl"><span class="s2"> ELSE &#39;65+&#39; </span></span></span><span class="line"><span class="cl"><span class="s2"> END AS age_range, </span></span></span><span class="line"><span class="cl"><span class="s2"> COUNT(*) AS count </span></span></span><span class="line"><span class="cl"><span class="s2"> ORDER BY age_range </span></span></span><span class="line"><span class="cl"><span class="s2"> &#34;&#34;&#34;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="n">age_ranges</span> <span class="o">=</span> <span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">&#39;age_range&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">]</span> </span></span><span class="line"><span class="cl"> <span class="n">counts</span> <span class="o">=</span> <span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">&#39;count&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">]</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="c1"># Visualize distribution</span> </span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">age_ranges</span><span class="p">,</span> <span class="n">counts</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">&#39;Age Distribution&#39;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s1">&#39;Age Range&#39;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s1">&#39;Count&#39;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="s1">&#39;age_distribution.png&#39;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="c1"># Calculate percentages</span> </span></span><span class="line"><span class="cl"> <span class="n">total</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">counts</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">age_range</span><span class="p">,</span> <span class="n">count</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">age_ranges</span><span class="p">,</span> <span class="n">counts</span><span class="p">):</span> </span></span><span class="line"><span class="cl"> <span class="n">pct</span> <span class="o">=</span> <span class="p">(</span><span class="n">count</span> <span class="o">/</span> <span class="n">total</span><span class="p">)</span> <span class="o">*</span> <span class="mi">100</span> </span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&#34;</span><span class="si">{</span><span class="n">age_range</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">count</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">pct</span><span class="si">:</span><span class="s2">.1f</span><span class="si">}</span><span class="s2">%)&#34;</span><span class="p">)</span> </span></span></code></pre></div> <h4 id="outlier-detection" class="position-relative d-flex align-items-center group"> <span>Outlier Detection</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="outlier-detection" aria-haspopup="dialog" aria-label="Share link: Outlier Detection"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">statistical</span><span class="w"> </span><span class="py">outliers</span><span class="w"> </span><span class="p">(</span><span class="py">values</span><span class="w"> </span><span class="py">beyond</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="py">standard</span><span class="w"> </span><span class="py">deviations</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">stats</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="p">(</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">RETURN</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AVG</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">mean_price</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">STDDEV</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">stddev_price</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">),</span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="p">:</span><span class="nc">stats</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">stddev_price</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">stddev_price</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">sku</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">,</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">price</span><span class="w"> </span><span class="py">outlier</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h3 id="data-cleansing-strategies" class="position-relative d-flex align-items-center group"> <span>Data Cleansing Strategies</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="data-cleansing-strategies" aria-haspopup="dialog" aria-label="Share link: Data Cleansing Strategies"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p>Data cleansing removes or corrects invalid, duplicate, or inconsistent data.</p> <h4 id="deduplication" class="position-relative d-flex align-items-center group"> <span>Deduplication</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="deduplication" aria-haspopup="dialog" aria-label="Share link: Deduplication"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">merge</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">person</span><span class="w"> </span><span class="py">records</span><span class="w"> </span><span class="py">by</span><span class="w"> </span><span class="py">email</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p1</span><span class="p">:</span><span class="nc">Person</span><span class="p">),</span><span class="w"> </span><span class="p">(</span><span class="py">p2</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p1</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">p2</span><span class="err">.</span><span class="py">email</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p1</span><span class="err">.</span><span class="py">id</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="py">p2</span><span class="err">.</span><span class="py">id</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Prevent</span><span class="w"> </span><span class="py">self</span><span class="err">-</span><span class="py">matches</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">pairs</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p1</span><span class="p">,</span><span class="w"> </span><span class="py">p2</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p2</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r</span><span class="p">]</span><span class="err">-&gt;</span><span class="p">(</span><span class="py">other</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">p1</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r2</span><span class="p">:</span><span class="nc">SAME_TYPE</span><span class="p">(</span><span class="py">r</span><span class="p">)]</span><span class="err">-&gt;</span><span class="p">(</span><span class="py">other</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">r2</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">properties</span><span class="p">(</span><span class="py">r</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">DELETE</span><span class="w"> </span><span class="py">r</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p1</span><span class="p">,</span><span class="w"> </span><span class="py">p2</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">DELETE</span><span class="w"> </span><span class="py">p2</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div><p>Python script for safe deduplication:</p> <div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - deduplicate with manual review</span> </span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">find_duplicates</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;&#34;&#34;Find potential duplicate persons based on email.&#34;&#34;&#34;</span> </span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">&#34;&#34;&#34; </span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (p:Person) </span></span></span><span class="line"><span class="cl"><span class="s2"> WITH p.email AS email, COLLECT(p) AS persons </span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE SIZE(persons) &gt; 1 </span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN </span></span></span><span class="line"><span class="cl"><span class="s2"> email, </span></span></span><span class="line"><span class="cl"><span class="s2"> [person IN persons | { </span></span></span><span class="line"><span class="cl"><span class="s2"> id: person.id, </span></span></span><span class="line"><span class="cl"><span class="s2"> name: person.name, </span></span></span><span class="line"><span class="cl"><span class="s2"> created_at: person.created_at </span></span></span><span class="line"><span class="cl"><span class="s2"> }] AS duplicates </span></span></span><span class="line"><span class="cl"><span class="s2"> &#34;&#34;&#34;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">merge_persons</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">,</span> <span class="n">keep_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">merge_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;&#34;&#34;Merge two person records, keeping the first.&#34;&#34;&#34;</span> </span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">tx</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">begin</span><span class="p">()</span> </span></span><span class="line"><span class="cl"> <span class="c1"># Transfer relationships</span> </span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">&#34;&#34;&#34; </span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (keep:Person {id: $keep_id}) </span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (merge:Person {id: $merge_id}) </span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (merge)-[r]-&gt;(other) </span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE NOT EXISTS((keep)-[:SAME_TYPE(r)]-&gt;(other)) </span></span></span><span class="line"><span class="cl"><span class="s2"> CREATE (keep)-[r2:SAME_TYPE(r)]-&gt;(other) </span></span></span><span class="line"><span class="cl"><span class="s2"> SET r2 = properties(r) </span></span></span><span class="line"><span class="cl"><span class="s2"> DELETE r </span></span></span><span class="line"><span class="cl"><span class="s2"> &#34;&#34;&#34;</span><span class="p">,</span> <span class="p">{</span><span class="s2">&#34;keep_id&#34;</span><span class="p">:</span> <span class="n">keep_id</span><span class="p">,</span> <span class="s2">&#34;merge_id&#34;</span><span class="p">:</span> <span class="n">merge_id</span><span class="p">})</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="c1"># Delete duplicate</span> </span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;MATCH (p:Person {id: $merge_id}) DELETE p&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="s2">&#34;merge_id&#34;</span><span class="p">:</span> <span class="n">merge_id</span><span class="p">}</span> </span></span><span class="line"><span class="cl"> <span class="p">)</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> </span></span></code></pre></div> <h4 id="data-standardization" class="position-relative d-flex align-items-center group"> <span>Data Standardization</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="data-standardization" aria-haspopup="dialog" aria-label="Share link: Data Standardization"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">email</span><span class="w"> </span><span class="py">addresses</span><span class="w"> </span><span class="p">(</span><span class="py">lowercase</span><span class="p">,</span><span class="w"> </span><span class="py">trimmed</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">LOWER</span><span class="p">(</span><span class="py">TRIM</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">))</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">phone</span><span class="w"> </span><span class="py">numbers</span><span class="w"> </span><span class="p">(</span><span class="py">remove</span><span class="w"> </span><span class="py">formatting</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">REGEXP_REPLACE</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="p">[</span><span class="err">^</span><span class="py">0</span><span class="err">-</span><span class="py">9</span><span class="err">+</span><span class="p">]</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">g</span><span class="err">&#39;</span><span class="p">)</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">country</span><span class="w"> </span><span class="py">codes</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">UPPER</span><span class="p">(</span><span class="py">TRIM</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">))</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">LENGTH</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">)</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">2</span><span class="err">;</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">ISO</span><span class="w"> </span><span class="py">3166</span><span class="err">-</span><span class="py">1</span><span class="w"> </span><span class="py">alpha</span><span class="err">-</span><span class="py">2</span><span class="w"> </span></span></span></code></pre></div> <h4 id="fixing-null-vs-empty-string" class="position-relative d-flex align-items-center group"> <span>Fixing NULL vs Empty String</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="fixing-null-vs-empty-string" aria-haspopup="dialog" aria-label="Share link: Fixing NULL vs Empty String"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Convert</span><span class="w"> </span><span class="py">empty</span><span class="w"> </span><span class="py">strings</span><span class="w"> </span><span class="py">to</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">consistency</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="py">END</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="py">END</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">&#39;&#39;</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="py">END</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h3 id="quality-monitoring-and-metrics" class="position-relative d-flex align-items-center group"> <span>Quality Monitoring and Metrics</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="quality-monitoring-and-metrics" aria-haspopup="dialog" aria-label="Share link: Quality Monitoring and Metrics"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p>Continuous monitoring ensures data quality doesn&rsquo;t degrade over time.</p> <h4 id="quality-dashboard" class="position-relative d-flex align-items-center group"> <span>Quality Dashboard</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="quality-dashboard" aria-haspopup="dialog" aria-label="Share link: Quality Dashboard"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Comprehensive</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">metrics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">person_metrics</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="p">(</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">RETURN</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">has_email</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">has_phone</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">SUM</span><span class="p">(</span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">&lt;</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">&gt;</span><span class="w"> </span><span class="py">150</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">1</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">END</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">invalid_age</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">SUM</span><span class="p">(</span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">!</span><span class="err">~</span><span class="w"> </span><span class="err">&#39;^</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+@</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+\.</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+$&#39;</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">1</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">END</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">invalid_email</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">&#39;</span><span class="py">Completeness</span><span class="p">:</span><span class="w"> </span><span class="nc">Email</span><span class="err">&#39;</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">metric</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">has_email</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">&#39;</span><span class="py">Completeness</span><span class="p">:</span><span class="w"> </span><span class="nc">Phone</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">has_phone</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">&#39;</span><span class="py">Validity</span><span class="p">:</span><span class="w"> </span><span class="nc">Age</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="p">(</span><span class="py">total</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">invalid_age</span><span class="p">)</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">&#39;</span><span class="py">Validity</span><span class="p">:</span><span class="w"> </span><span class="nc">Email</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="p">(</span><span class="py">has_email</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">invalid_email</span><span class="p">)</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">has_email</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h4 id="automated-quality-checks" class="position-relative d-flex align-items-center group"> <span>Automated Quality Checks</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="automated-quality-checks" aria-haspopup="dialog" aria-label="Share link: Automated Quality Checks"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - automated quality monitoring</span> </span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">dataclasses</span> <span class="kn">import</span> <span class="n">dataclass</span> </span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span> </span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">asyncio</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="nd">@dataclass</span> </span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">QualityCheck</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span> </span></span><span class="line"><span class="cl"> <span class="n">query</span><span class="p">:</span> <span class="nb">str</span> </span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="c1"># Acceptable quality score (0-100)</span> </span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">QualityMonitor</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span> </span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">client</span> <span class="o">=</span> <span class="n">client</span> </span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">checks</span> <span class="o">=</span> <span class="p">[</span> </span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;Email Completeness&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;MATCH (p:Person) RETURN COUNT(p.email) * 100.0 / COUNT(*) AS score&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">95.0</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="o">=</span><span class="kc">True</span> </span></span><span class="line"><span class="cl"> <span class="p">),</span> </span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;Valid Email Format&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;&#34;&#34;MATCH (p:Person) </span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.email IS NOT NULL </span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN SUM(CASE WHEN p.email ~ &#39;^[^@]+@[^@]+\.[^@]+$&#39; THEN 1 ELSE 0 END) </span></span></span><span class="line"><span class="cl"><span class="s2"> * 100.0 / COUNT(*) AS score&#34;&#34;&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">99.0</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="o">=</span><span class="kc">True</span> </span></span><span class="line"><span class="cl"> <span class="p">),</span> </span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;Age Validity&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;&#34;&#34;MATCH (p:Person) </span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.age IS NOT NULL </span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN SUM(CASE WHEN p.age &gt;= 0 AND p.age &lt;= 150 THEN 1 ELSE 0 END) </span></span></span><span class="line"><span class="cl"><span class="s2"> * 100.0 / COUNT(*) AS score&#34;&#34;&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">99.5</span> </span></span><span class="line"><span class="cl"> <span class="p">),</span> </span></span><span class="line"><span class="cl"> <span class="p">]</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">def</span> <span class="nf">run_check</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">check</span><span class="p">:</span> <span class="n">QualityCheck</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="n">result</span> <span class="o">=</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">check</span><span class="o">.</span><span class="n">query</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">score</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">&#39;score&#39;</span><span class="p">]</span> </span></span><span class="line"><span class="cl"> <span class="n">passed</span> <span class="o">=</span> <span class="n">score</span> <span class="o">&gt;=</span> <span class="n">check</span><span class="o">.</span><span class="n">threshold</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;name&#34;</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;score&#34;</span><span class="p">:</span> <span class="n">score</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;threshold&#34;</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">threshold</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;passed&#34;</span><span class="p">:</span> <span class="n">passed</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;critical&#34;</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">critical</span> </span></span><span class="line"><span class="cl"> <span class="p">}</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">def</span> <span class="nf">run_all_checks</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="n">results</span> <span class="o">=</span> <span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">gather</span><span class="p">(</span><span class="o">*</span><span class="p">[</span> </span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">run_check</span><span class="p">(</span><span class="n">check</span><span class="p">)</span> <span class="k">for</span> <span class="n">check</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">checks</span> </span></span><span class="line"><span class="cl"> <span class="p">])</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="n">failed_critical</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">results</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">r</span><span class="p">[</span><span class="s1">&#39;passed&#39;</span><span class="p">]</span> <span class="ow">and</span> <span class="n">r</span><span class="p">[</span><span class="s1">&#39;critical&#39;</span><span class="p">]]</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;timestamp&#34;</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;checks&#34;</span><span class="p">:</span> <span class="n">results</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;overall_passed&#34;</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">failed_critical</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;failed_critical_count&#34;</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">failed_critical</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="p">}</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"><span class="c1"># Usage</span> </span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">main</span><span class="p">():</span> </span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">&#34;localhost&#34;</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span> </span></span><span class="line"><span class="cl"> <span class="n">monitor</span> <span class="o">=</span> <span class="n">QualityMonitor</span><span class="p">(</span><span class="n">client</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="n">report</span> <span class="o">=</span> <span class="k">await</span> <span class="n">monitor</span><span class="o">.</span><span class="n">run_all_checks</span><span class="p">()</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&#34;Quality Report - </span><span class="si">{</span><span class="n">report</span><span class="p">[</span><span class="s1">&#39;timestamp&#39;</span><span class="p">]</span><span class="si">}</span><span class="s2">&#34;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&#34;Overall Status: </span><span class="si">{</span><span class="s1">&#39;PASS&#39;</span> <span class="k">if</span> <span class="n">report</span><span class="p">[</span><span class="s1">&#39;overall_passed&#39;</span><span class="p">]</span> <span class="k">else</span> <span class="s1">&#39;FAIL&#39;</span><span class="si">}</span><span class="s2">&#34;</span><span class="p">)</span> </span></span><span class="line"><span class="cl"> </span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">check</span> <span class="ow">in</span> <span class="n">report</span><span class="p">[</span><span class="s1">&#39;checks&#39;</span><span class="p">]:</span> </span></span><span class="line"><span class="cl"> <span class="n">status</span> <span class="o">=</span> <span class="s1">&#39;✓&#39;</span> <span class="k">if</span> <span class="n">check</span><span class="p">[</span><span class="s1">&#39;passed&#39;</span><span class="p">]</span> <span class="k">else</span> <span class="s1">&#39;✗&#39;</span> </span></span><span class="line"><span class="cl"> <span class="n">critical</span> <span class="o">=</span> <span class="s1">&#39; [CRITICAL]&#39;</span> <span class="k">if</span> <span class="n">check</span><span class="p">[</span><span class="s1">&#39;critical&#39;</span><span class="p">]</span> <span class="k">else</span> <span class="s1">&#39;&#39;</span> </span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&#34;</span><span class="si">{</span><span class="n">status</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">&#39;score&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">% &#34;</span> </span></span><span class="line"><span class="cl"> <span class="sa">f</span><span class="s2">&#34;(threshold: </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">&#39;threshold&#39;</span><span class="p">]</span><span class="si">}</span><span class="s2">%)</span><span class="si">{</span><span class="n">critical</span><span class="si">}</span><span class="s2">&#34;</span><span class="p">)</span> </span></span></code></pre></div> <h3 id="data-governance" class="position-relative d-flex align-items-center group"> <span>Data Governance</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="data-governance" aria-haspopup="dialog" aria-label="Share link: Data Governance"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p>Establish policies and procedures for maintaining data quality.</p> <h4 id="quality-rules-documentation" class="position-relative d-flex align-items-center group"> <span>Quality Rules Documentation</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="quality-rules-documentation" aria-haspopup="dialog" aria-label="Share link: Quality Rules Documentation"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Define quality rules as code</span> </span></span><span class="line"><span class="cl"><span class="n">QUALITY_RULES</span> <span class="o">=</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;person_email&#34;</span><span class="p">:</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;description&#34;</span><span class="p">:</span> <span class="s2">&#34;Every person must have a valid email address&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;check&#34;</span><span class="p">:</span> <span class="s2">&#34;p.email IS NOT NULL AND p.email ~ &#39;^[^@]+@[^@]+</span><span class="se">\\</span><span class="s2">.[^@]+$&#39;&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;severity&#34;</span><span class="p">:</span> <span class="s2">&#34;critical&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;owner&#34;</span><span class="p">:</span> <span class="s2">&#34;[email protected]&#34;</span> </span></span><span class="line"><span class="cl"> <span class="p">},</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;order_total&#34;</span><span class="p">:</span> <span class="p">{</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;description&#34;</span><span class="p">:</span> <span class="s2">&#34;Order total must match sum of line items&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;check&#34;</span><span class="p">:</span> <span class="s2">&#34;o.total = (MATCH (o)-[:HAS_ITEM]-&gt;(i) RETURN SUM(i.price * i.quantity))&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;severity&#34;</span><span class="p">:</span> <span class="s2">&#34;high&#34;</span><span class="p">,</span> </span></span><span class="line"><span class="cl"> <span class="s2">&#34;owner&#34;</span><span class="p">:</span> <span class="s2">&#34;[email protected]&#34;</span> </span></span><span class="line"><span class="cl"> <span class="p">},</span> </span></span><span class="line"><span class="cl"><span class="p">}</span> </span></span></code></pre></div> <h4 id="audit-trail" class="position-relative d-flex align-items-center group"> <span>Audit Trail</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="audit-trail" aria-haspopup="dialog" aria-label="Share link: Audit Trail"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Track</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">issues</span><span class="w"> </span><span class="py">over</span><span class="w"> </span><span class="py">time</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">NODE</span><span class="w"> </span><span class="py">TYPE</span><span class="w"> </span><span class="py">DataQualityIssue</span><span class="w"> </span><span class="p">(</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">id</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">DEFAULT</span><span class="w"> </span><span class="py">gen_random_uuid</span><span class="p">(),</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">issue_type</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">severity</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">CHECK</span><span class="w"> </span><span class="p">(</span><span class="py">severity</span><span class="w"> </span><span class="py">IN</span><span class="w"> </span><span class="p">(</span><span class="err">&#39;</span><span class="py">low</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">medium</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">high</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span><span class="err">&#39;</span><span class="py">critical</span><span class="err">&#39;</span><span class="p">)),</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">detected_at</span><span class="w"> </span><span class="py">TIMESTAMP</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">DEFAULT</span><span class="w"> </span><span class="py">NOW</span><span class="p">(),</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">resolved_at</span><span class="w"> </span><span class="py">TIMESTAMP</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_type</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_id</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">description</span><span class="w"> </span><span class="py">TEXT</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">resolution</span><span class="w"> </span><span class="py">TEXT</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="err">;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Log</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">issue</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">INSERT</span><span class="w"> </span><span class="p">(</span><span class="py">issue</span><span class="p">:</span><span class="nc">DataQualityIssue</span><span class="w"> </span><span class="p">{</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">issue_type</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">invalid_email</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">severity</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">high</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_type</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">Person</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_id</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">person</span><span class="err">-</span><span class="py">123</span><span class="err">&#39;</span><span class="p">,</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">description</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="nc">Email</span><span class="w"> </span><span class="py">format</span><span class="w"> </span><span class="py">validation</span><span class="w"> </span><span class="py">failed</span><span class="err">&#39;</span><span class="w"> </span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">})</span><span class="err">;</span><span class="w"> </span></span></span></code></pre></div> <h3 id="best-practices" class="position-relative d-flex align-items-center group"> <span>Best Practices</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="best-practices" aria-haspopup="dialog" aria-label="Share link: Best Practices"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><ol> <li><strong>Define Quality Standards Early</strong>: Establish quality metrics during schema design</li> <li><strong>Automate Quality Checks</strong>: Run validation on every data import and update</li> <li><strong>Profile Regularly</strong>: Schedule weekly or daily data profiling jobs</li> <li><strong>Track Metrics Over Time</strong>: Monitor quality trends to detect degradation</li> <li><strong>Document Quality Rules</strong>: Maintain a catalog of what constitutes &ldquo;quality&rdquo;</li> <li><strong>Assign Ownership</strong>: Each data domain should have a quality owner</li> <li><strong>Prevent at Source</strong>: Validate data at input rather than fixing later</li> <li><strong>Clean Incrementally</strong>: Don&rsquo;t wait for major cleansing projects</li> <li><strong>Version Quality Rules</strong>: Track changes to quality definitions</li> <li><strong>Alert on Critical Issues</strong>: Notify stakeholders when quality drops below thresholds</li> </ol> <h3 id="common-quality-anti-patterns" class="position-relative d-flex align-items-center group"> <span>Common Quality Anti-Patterns</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="common-quality-anti-patterns" aria-haspopup="dialog" aria-label="Share link: Common Quality Anti-Patterns"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><p><strong>Accepting &ldquo;good enough&rdquo; data</strong>: Quality degrades exponentially over time</p> <p><strong>Manual cleansing</strong>: Doesn&rsquo;t scale, introduces human error</p> <p><strong>Ignoring outliers</strong>: Outliers often indicate deeper quality issues</p> <p><strong>No monitoring</strong>: Quality issues go unnoticed until crisis</p> <p><strong>Fixing symptoms not causes</strong>: Cleanse source data, not just database</p> <h3 id="related-topics" class="position-relative d-flex align-items-center group"> <span>Related Topics</span> <button type="button" class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1" data-share-target="related-topics" aria-haspopup="dialog" aria-label="Share link: Related Topics"> <i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i> <span class="visually-hidden">Share link</span> </button> </h3><ul> <li><a href="/tags/validation" >Validation</a> - Data validation techniques</li> <li><a href="/tags/constraints" >Constraints</a> - Schema constraint enforcement</li> <li><a href="/tags/etl" >ETL</a> - Data integration quality</li> <li><a href="/tags/monitoring" >Monitoring</a> - System monitoring</li> <li><a href="/tags/governance/" >Data Governance</a> - Governance policies</li> <li><a href="/tags/testing" >Testing</a> - Quality testing strategies</li> </ul>

Related Articles

No articles found with this tag yet.

Back to Home