<!-- CANARY: REQ=REQ-DOCS-001; FEATURE="Docs"; ASPECT=Documentation; STATUS=TESTED; OWNER=docs; UPDATED=2026-01-15 -->
<h2 id="data-quality-management" class="position-relative d-flex align-items-center group">
<span>Data Quality Management</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="data-quality-management"
aria-haspopup="dialog"
aria-label="Share link: Data Quality Management">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h2><div id="headingShareModal" class="heading-share-modal" role="dialog" aria-modal="true" aria-labelledby="headingShareTitle" hidden>
<div class="hsm-dialog" role="document">
<div class="hsm-header">
<h2 id="headingShareTitle" class="h6 mb-0 fw-bold">Share this section</h2>
<button type="button" class="hsm-close" aria-label="Close">
<i class="fa-solid fa-xmark"></i>
</button>
</div>
<div class="hsm-body">
<label for="headingShareInput" class="form-label small text-muted mb-1 text-uppercase fw-bold" style="font-size: 0.7rem; letter-spacing: 0.5px;">Permalink</label>
<div class="input-group mb-4 hsm-url-group">
<input id="headingShareInput" type="text" class="form-control font-monospace" readonly aria-readonly="true" style="font-size: 0.85rem;" />
<button class="btn btn-primary hsm-copy" type="button" aria-label="Copy" title="Copy">
<i class="fa-duotone fa-clipboard" aria-hidden="true"></i>
</button>
</div>
<div class="small fw-bold mb-2 text-muted text-uppercase" style="font-size: 0.7rem; letter-spacing: 0.5px;">Share via</div>
<div class="hsm-share-grid">
<a id="share-twitter" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-twitter me-2"></i>Twitter
</a>
<a id="share-linkedin" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-linkedin me-2"></i>LinkedIn
</a>
<a id="share-facebook" class="btn btn-outline-secondary w-100" target="_blank" rel="noopener noreferrer">
<i class="fa-brands fa-facebook me-2"></i>Facebook
</a>
</div>
</div>
</div>
</div>
<style>
.heading-share-modal {
position: fixed;
inset: 0;
display: flex;
justify-content: center;
align-items: center;
background: rgba(0, 0, 0, 0.6);
z-index: 1050;
padding: 1rem;
backdrop-filter: blur(4px);
-webkit-backdrop-filter: blur(4px);
}
.heading-share-modal[hidden] { display: none !important; }
.hsm-dialog {
max-width: 420px;
width: 100%;
background: var(--bs-body-bg, #fff);
color: var(--bs-body-color, #212529);
border: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
overflow: hidden;
animation: hsm-fade-in 0.2s ease-out;
}
@keyframes hsm-fade-in {
from { opacity: 0; transform: scale(0.95); }
to { opacity: 1; transform: scale(1); }
}
[data-bs-theme="dark"] .hsm-dialog {
background: #1e293b;
border-color: rgba(255,255,255,0.1);
color: #f8f9fa;
}
.hsm-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--bs-border-color, rgba(0,0,0,0.1));
background: rgba(0,0,0,0.02);
}
[data-bs-theme="dark"] .hsm-header {
background: rgba(255,255,255,0.02);
border-color: rgba(255,255,255,0.1);
}
.hsm-close {
background: transparent;
border: none;
color: inherit;
opacity: 0.5;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-size: 1.2rem;
line-height: 1;
transition: opacity 0.2s;
}
.hsm-close:hover {
opacity: 1;
}
.hsm-body {
padding: 1.5rem;
}
.hsm-url-group {
display: flex !important;
align-items: stretch;
}
.hsm-url-group .form-control {
flex: 1;
min-width: 0;
margin: 0;
background: var(--bs-secondary-bg, #f8f9fa);
border-color: var(--bs-border-color, #dee2e6);
border-top-right-radius: 0;
border-bottom-right-radius: 0;
height: 42px;
}
.hsm-url-group .btn {
flex: 0 0 auto;
margin: 0;
margin-left: -1px;
border-top-left-radius: 0;
border-bottom-left-radius: 0;
height: 42px;
display: flex;
align-items: center;
justify-content: center;
padding: 0 1.25rem;
z-index: 2;
}
[data-bs-theme="dark"] .hsm-url-group .form-control {
background: #0f172a;
border-color: #334155;
color: #e2e8f0;
}
.hsm-share-grid {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.hsm-share-grid .btn {
display: flex;
align-items: center;
justify-content: center;
font-size: 0.9rem;
padding: 0.6rem;
border-color: var(--bs-border-color);
width: 100%;
}
[data-bs-theme="dark"] .hsm-share-grid .btn {
color: #e2e8f0;
border-color: #475569;
}
[data-bs-theme="dark"] .hsm-share-grid .btn:hover {
background: #334155;
border-color: #cbd5e1;
}
</style>
<script>
(function(){
const modal = document.getElementById('headingShareModal');
if(!modal) return;
const input = modal.querySelector('#headingShareInput');
const copyBtn = modal.querySelector('.hsm-copy');
const twitter = modal.querySelector('#share-twitter');
const linkedin = modal.querySelector('#share-linkedin');
const facebook = modal.querySelector('#share-facebook');
const closeBtn = modal.querySelector('.hsm-close');
let lastFocus=null;
let trapBound=false;
function buildUrl(id){ return window.location.origin + window.location.pathname + '#' + id; }
function isOpen(){ return !modal.hasAttribute('hidden'); }
function hydrate(id){
const url=buildUrl(id);
input.value=url;
const enc=encodeURIComponent(url);
const text=encodeURIComponent(document.title);
if(twitter) twitter.href=`https://twitter.com/intent/tweet?url=${enc}&text=${text}`;
if(linkedin) linkedin.href=`https://www.linkedin.com/sharing/share-offsite/?url=${enc}`;
if(facebook) facebook.href=`https://www.facebook.com/sharer/sharer.php?u=${enc}`;
}
function openModal(id){
lastFocus=document.activeElement;
hydrate(id);
if(!isOpen()){
modal.removeAttribute('hidden');
}
requestAnimationFrame(()=>{ input.focus(); });
trapFocus();
}
function closeModal(){
if(!isOpen()) return;
modal.setAttribute('hidden','');
if(lastFocus && typeof lastFocus.focus==='function') lastFocus.focus();
}
function copyCurrent(){
try{ navigator.clipboard.writeText(input.value).then(()=>feedback(true),()=>fallback()); }
catch(e){ fallback(); }
}
function fallback(){ input.select(); try{ document.execCommand('copy'); feedback(true);}catch(e){ feedback(false);} }
function feedback(ok){ if(!copyBtn) return; const icon=copyBtn.querySelector('i'); if(!icon) return; const prev=copyBtn.getAttribute('data-prev')||icon.className; if(!copyBtn.getAttribute('data-prev')) copyBtn.setAttribute('data-prev',prev); icon.className= ok ? 'fa-duotone fa-clipboard-check':'fa-duotone fa-circle-exclamation'; setTimeout(()=>{ icon.className=prev; },1800); }
function handleShareClick(e){ e.preventDefault(); const btn=e.currentTarget; const id=btn.getAttribute('data-share-target'); if(id) openModal(id); }
function bindShareButtons(){
document.querySelectorAll('.h-share').forEach(btn=>{
if(!btn.dataset.hShareBound){ btn.addEventListener('click', handleShareClick); btn.dataset.hShareBound='1'; }
});
}
bindShareButtons();
if(document.readyState==='loading'){
document.addEventListener('DOMContentLoaded', bindShareButtons);
} else {
requestAnimationFrame(bindShareButtons);
}
document.addEventListener('click', function(e){
const shareBtn=e.target.closest && e.target.closest('.h-share');
if(shareBtn && !shareBtn.dataset.hShareBound){ handleShareClick.call(shareBtn, e); }
}, true);
document.addEventListener('click', e=>{
if(e.target===modal) closeModal();
if(e.target.closest && e.target.closest('.hsm-close')){ e.preventDefault(); closeModal(); }
if(copyBtn && (e.target===copyBtn || (e.target.closest && e.target.closest('.hsm-copy')))) { e.preventDefault(); copyCurrent(); }
});
document.addEventListener('keydown', e=>{ if(e.key==='Escape' && isOpen()) closeModal(); });
function trapFocus(){
if(trapBound) return;
trapBound=true;
modal.addEventListener('keydown', f=>{ if(f.key==='Tab' && isOpen()){ const focusable=[...modal.querySelectorAll('a[href],button,input,textarea,select,[tabindex]:not([tabindex="-1"])')].filter(el=>!el.hasAttribute('disabled')); if(!focusable.length) return; const first=focusable[0]; const last=focusable[focusable.length-1]; if(f.shiftKey && document.activeElement===first){ f.preventDefault(); last.focus(); } else if(!f.shiftKey && document.activeElement===last){ f.preventDefault(); first.focus(); } } });
}
if(closeBtn) closeBtn.addEventListener('click', e=>{ e.preventDefault(); closeModal(); });
})();
</script><p>Data quality management in Geode encompasses the processes, techniques, and tools that ensure your graph database contains accurate, complete, consistent, and timely data. High-quality data is essential for reliable analytics, trustworthy decision-making, and operational efficiency. Poor data quality leads to incorrect insights, failed transactions, and loss of user trust.</p>
<h3 id="the-six-dimensions-of-data-quality" class="position-relative d-flex align-items-center group">
<span>The Six Dimensions of Data Quality</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="the-six-dimensions-of-data-quality"
aria-haspopup="dialog"
aria-label="Share link: The Six Dimensions of Data Quality">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Understanding these six fundamental dimensions helps frame your quality management strategy:</p>
<h4 id="1-accuracy" class="position-relative d-flex align-items-center group">
<span>1. Accuracy</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="1-accuracy"
aria-haspopup="dialog"
aria-label="Share link: 1. Accuracy">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Accuracy measures whether data correctly represents the real-world entities and relationships it describes. Inaccurate data leads to wrong conclusions and faulty business decisions.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>Person’s age matches their birth date</li>
<li>Product prices reflect current market rates</li>
<li>Addresses correspond to real locations</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Verify</span><span class="w"> </span><span class="py">age</span><span class="w"> </span><span class="py">matches</span><span class="w"> </span><span class="py">birth</span><span class="w"> </span><span class="py">date</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="p">!=</span><span class="w"> </span><span class="py">EXTRACT</span><span class="p">(</span><span class="py">YEAR</span><span class="w"> </span><span class="py">FROM</span><span class="w"> </span><span class="py">AGE</span><span class="p">(</span><span class="py">CURRENT_DATE</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="p">))</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">date_of_birth</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">age</span><span class="w"> </span><span class="py">mismatch</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="2-completeness" class="position-relative d-flex align-items-center group">
<span>2. Completeness</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="2-completeness"
aria-haspopup="dialog"
aria-label="Share link: 2. Completeness">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Completeness measures whether all required data is present. Missing critical data prevents proper graph traversals and analytics.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>All customers have email addresses</li>
<li>Products have prices and SKUs</li>
<li>Employees have hire dates</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">incomplete</span><span class="w"> </span><span class="py">records</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">created_at</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">missing</span><span class="w"> </span><span class="py">required</span><span class="w"> </span><span class="py">fields</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Completeness</span><span class="w"> </span><span class="py">percentage</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">with_phone</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">),</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">phone_completeness_pct</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="3-consistency" class="position-relative d-flex align-items-center group">
<span>3. Consistency</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="3-consistency"
aria-haspopup="dialog"
aria-label="Share link: 3. Consistency">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Consistency ensures data is uniform across the system and doesn’t contradict itself. Inconsistent data creates confusion and unreliable queries.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>Same email format across all records</li>
<li>Consistent date formats</li>
<li>Matching foreign key references</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Check</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">date</span><span class="w"> </span><span class="py">inconsistencies</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">o</span><span class="p">:</span><span class="nc">Order</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">created_date</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">delivered_date</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">created_date</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">shipped_date</span><span class="p">,</span><span class="w"> </span><span class="py">o</span><span class="err">.</span><span class="py">delivered_date</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">date</span><span class="w"> </span><span class="py">sequence</span><span class="w"> </span><span class="py">error</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">orphaned</span><span class="w"> </span><span class="py">relationships</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">e</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r</span><span class="p">:</span><span class="nc">WORKS_FOR</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="py">c</span><span class="p">:</span><span class="nc">Company</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">EXISTS</span><span class="p">((</span><span class="py">e</span><span class="p">:</span><span class="nc">Employee</span><span class="p">))</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">e</span><span class="p">,</span><span class="w"> </span><span class="py">r</span><span class="p">,</span><span class="w"> </span><span class="py">c</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">orphaned</span><span class="w"> </span><span class="py">relationship</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="4-timeliness" class="position-relative d-flex align-items-center group">
<span>4. Timeliness</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="4-timeliness"
aria-haspopup="dialog"
aria-label="Share link: 4. Timeliness">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Timeliness measures whether data is up-to-date and available when needed. Stale data leads to decisions based on outdated information.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>Inventory levels updated in real-time</li>
<li>User profiles reflect recent changes</li>
<li>Analytics dashboards show current data</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">stale</span><span class="w"> </span><span class="py">records</span><span class="w"> </span><span class="p">(</span><span class="py">not</span><span class="w"> </span><span class="py">updated</span><span class="w"> </span><span class="py">in</span><span class="w"> </span><span class="py">30</span><span class="w"> </span><span class="py">days</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">updated_at</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="p">(</span><span class="py">NOW</span><span class="p">()</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">INTERVAL</span><span class="w"> </span><span class="err">'</span><span class="py">30</span><span class="w"> </span><span class="py">days</span><span class="err">'</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">sku</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">updated_at</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">stale</span><span class="w"> </span><span class="py">data</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="5-validity" class="position-relative d-flex align-items-center group">
<span>5. Validity</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="5-validity"
aria-haspopup="dialog"
aria-label="Share link: 5. Validity">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Validity ensures data conforms to defined formats, ranges, and business rules. Invalid data fails to meet schema constraints.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>Email addresses match email format</li>
<li>Ages within reasonable range (0-150)</li>
<li>Dates are valid calendar dates</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">invalid</span><span class="w"> </span><span class="py">formats</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="err">~</span><span class="w"> </span><span class="err">'^</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z0</span><span class="err">-</span><span class="py">9</span><span class="err">.</span><span class="py">_</span><span class="err">%+-</span><span class="p">]</span><span class="err">+@</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z0</span><span class="err">-</span><span class="py">9</span><span class="err">.-</span><span class="p">]</span><span class="err">+\.</span><span class="p">[</span><span class="py">a</span><span class="err">-</span><span class="py">zA</span><span class="err">-</span><span class="py">Z</span><span class="p">]{</span><span class="py">2</span><span class="p">,}</span><span class="err">$'</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">invalid</span><span class="w"> </span><span class="py">email</span><span class="w"> </span><span class="py">format</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">out</span><span class="err">-</span><span class="py">of</span><span class="err">-</span><span class="py">range</span><span class="w"> </span><span class="py">values</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">150</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">id</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">age</span><span class="w"> </span><span class="py">out</span><span class="w"> </span><span class="py">of</span><span class="w"> </span><span class="py">range</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="6-uniqueness" class="position-relative d-flex align-items-center group">
<span>6. Uniqueness</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="6-uniqueness"
aria-haspopup="dialog"
aria-label="Share link: 6. Uniqueness">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><p>Uniqueness ensures no duplicate records exist that should be unique. Duplicates skew analytics and create confusion.</p>
<p><strong>Examples</strong>:</p>
<ul>
<li>No duplicate email addresses</li>
<li>Unique product SKUs</li>
<li>Single canonical record per entity</li>
</ul>
<p><strong>Validation</strong>:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">emails</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="py">COLLECT</span><span class="p">(</span><span class="py">p</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">persons</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">SIZE</span><span class="p">(</span><span class="py">persons</span><span class="p">)</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">1</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">email</span><span class="p">,</span><span class="w"> </span><span class="py">SIZE</span><span class="p">(</span><span class="py">persons</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">duplicate_count</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">duplicate</span><span class="w"> </span><span class="py">email</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="data-profiling" class="position-relative d-flex align-items-center group">
<span>Data Profiling</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="data-profiling"
aria-haspopup="dialog"
aria-label="Share link: Data Profiling">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Data profiling analyzes your database to understand patterns, distributions, and anomalies. Regular profiling helps identify quality issues before they impact operations.</p>
<h4 id="statistical-profiling" class="position-relative d-flex align-items-center group">
<span>Statistical Profiling</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="statistical-profiling"
aria-haspopup="dialog"
aria-label="Share link: Statistical Profiling">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Comprehensive</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">profile</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">Person</span><span class="w"> </span><span class="py">nodes</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total_persons</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">emails_present</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">phones_present</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">DISTINCT</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">distinct_countries</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MIN</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">min_age</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MAX</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">max_age</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AVG</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">avg_age</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">PERCENTILE_CONT</span><span class="p">(</span><span class="py">0</span><span class="mf">.5</span><span class="p">)</span><span class="w"> </span><span class="py">WITHIN</span><span class="w"> </span><span class="py">GROUP</span><span class="w"> </span><span class="p">(</span><span class="py">ORDER</span><span class="w"> </span><span class="py">BY</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">median_age</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="distribution-analysis" class="position-relative d-flex align-items-center group">
<span>Distribution Analysis</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="distribution-analysis"
aria-haspopup="dialog"
aria-label="Share link: Distribution Analysis">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - analyze age distribution</span>
</span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">geode_client</span> <span class="kn">import</span> <span class="n">Client</span>
</span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">profile_age_distribution</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">client</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (p:Person)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.age IS NOT NULL
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN
</span></span></span><span class="line"><span class="cl"><span class="s2"> CASE
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age < 18 THEN 'Under 18'
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age < 30 THEN '18-29'
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age < 50 THEN '30-49'
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHEN p.age < 65 THEN '50-64'
</span></span></span><span class="line"><span class="cl"><span class="s2"> ELSE '65+'
</span></span></span><span class="line"><span class="cl"><span class="s2"> END AS age_range,
</span></span></span><span class="line"><span class="cl"><span class="s2"> COUNT(*) AS count
</span></span></span><span class="line"><span class="cl"><span class="s2"> ORDER BY age_range
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="n">age_ranges</span> <span class="o">=</span> <span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">'age_range'</span><span class="p">]</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="n">counts</span> <span class="o">=</span> <span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">'count'</span><span class="p">]</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Visualize distribution</span>
</span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">age_ranges</span><span class="p">,</span> <span class="n">counts</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Age Distribution'</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s1">'Age Range'</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s1">'Count'</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="s1">'age_distribution.png'</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Calculate percentages</span>
</span></span><span class="line"><span class="cl"> <span class="n">total</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">counts</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">age_range</span><span class="p">,</span> <span class="n">count</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">age_ranges</span><span class="p">,</span> <span class="n">counts</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">pct</span> <span class="o">=</span> <span class="p">(</span><span class="n">count</span> <span class="o">/</span> <span class="n">total</span><span class="p">)</span> <span class="o">*</span> <span class="mi">100</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">age_range</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">count</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">pct</span><span class="si">:</span><span class="s2">.1f</span><span class="si">}</span><span class="s2">%)"</span><span class="p">)</span>
</span></span></code></pre></div>
<h4 id="outlier-detection" class="position-relative d-flex align-items-center group">
<span>Outlier Detection</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="outlier-detection"
aria-haspopup="dialog"
aria-label="Share link: Outlier Detection">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">statistical</span><span class="w"> </span><span class="py">outliers</span><span class="w"> </span><span class="p">(</span><span class="py">values</span><span class="w"> </span><span class="py">beyond</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="py">standard</span><span class="w"> </span><span class="py">deviations</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">stats</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">RETURN</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AVG</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">mean_price</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">STDDEV</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">stddev_price</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Product</span><span class="p">),</span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="p">:</span><span class="nc">stats</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">stddev_price</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="p">(</span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="py">3</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">stddev_price</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">RETURN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">sku</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">name</span><span class="p">,</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">price</span><span class="p">,</span><span class="w"> </span><span class="py">s</span><span class="err">.</span><span class="py">mean_price</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">price</span><span class="w"> </span><span class="py">outlier</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">issue</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="data-cleansing-strategies" class="position-relative d-flex align-items-center group">
<span>Data Cleansing Strategies</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="data-cleansing-strategies"
aria-haspopup="dialog"
aria-label="Share link: Data Cleansing Strategies">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Data cleansing removes or corrects invalid, duplicate, or inconsistent data.</p>
<h4 id="deduplication" class="position-relative d-flex align-items-center group">
<span>Deduplication</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="deduplication"
aria-haspopup="dialog"
aria-label="Share link: Deduplication">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Find</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">merge</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">person</span><span class="w"> </span><span class="py">records</span><span class="w"> </span><span class="py">by</span><span class="w"> </span><span class="py">email</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p1</span><span class="p">:</span><span class="nc">Person</span><span class="p">),</span><span class="w"> </span><span class="p">(</span><span class="py">p2</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p1</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">p2</span><span class="err">.</span><span class="py">email</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">AND</span><span class="w"> </span><span class="py">p1</span><span class="err">.</span><span class="py">id</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="py">p2</span><span class="err">.</span><span class="py">id</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">Prevent</span><span class="w"> </span><span class="py">self</span><span class="err">-</span><span class="py">matches</span><span class="w"> </span><span class="py">and</span><span class="w"> </span><span class="py">duplicate</span><span class="w"> </span><span class="py">pairs</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p1</span><span class="p">,</span><span class="w"> </span><span class="py">p2</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p2</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r</span><span class="p">]</span><span class="err">-></span><span class="p">(</span><span class="py">other</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="p">(</span><span class="py">p1</span><span class="p">)</span><span class="err">-</span><span class="p">[</span><span class="py">r2</span><span class="p">:</span><span class="nc">SAME_TYPE</span><span class="p">(</span><span class="py">r</span><span class="p">)]</span><span class="err">-></span><span class="p">(</span><span class="py">other</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">r2</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">properties</span><span class="p">(</span><span class="py">r</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">DELETE</span><span class="w"> </span><span class="py">r</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">p1</span><span class="p">,</span><span class="w"> </span><span class="py">p2</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">DELETE</span><span class="w"> </span><span class="py">p2</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div><p>Python script for safe deduplication:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - deduplicate with manual review</span>
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">find_duplicates</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"""Find potential duplicate persons based on email."""</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (p:Person)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WITH p.email AS email, COLLECT(p) AS persons
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE SIZE(persons) > 1
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN
</span></span></span><span class="line"><span class="cl"><span class="s2"> email,
</span></span></span><span class="line"><span class="cl"><span class="s2"> [person IN persons | {
</span></span></span><span class="line"><span class="cl"><span class="s2"> id: person.id,
</span></span></span><span class="line"><span class="cl"><span class="s2"> name: person.name,
</span></span></span><span class="line"><span class="cl"><span class="s2"> created_at: person.created_at
</span></span></span><span class="line"><span class="cl"><span class="s2"> }] AS duplicates
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">merge_persons</span><span class="p">(</span><span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">,</span> <span class="n">keep_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">merge_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"""Merge two person records, keeping the first."""</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">tx</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">begin</span><span class="p">()</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># Transfer relationships</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s2">"""
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (keep:Person {id: $keep_id})
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (merge:Person {id: $merge_id})
</span></span></span><span class="line"><span class="cl"><span class="s2"> MATCH (merge)-[r]->(other)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE NOT EXISTS((keep)-[:SAME_TYPE(r)]->(other))
</span></span></span><span class="line"><span class="cl"><span class="s2"> CREATE (keep)-[r2:SAME_TYPE(r)]->(other)
</span></span></span><span class="line"><span class="cl"><span class="s2"> SET r2 = properties(r)
</span></span></span><span class="line"><span class="cl"><span class="s2"> DELETE r
</span></span></span><span class="line"><span class="cl"><span class="s2"> """</span><span class="p">,</span> <span class="p">{</span><span class="s2">"keep_id"</span><span class="p">:</span> <span class="n">keep_id</span><span class="p">,</span> <span class="s2">"merge_id"</span><span class="p">:</span> <span class="n">merge_id</span><span class="p">})</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="c1"># Delete duplicate</span>
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"MATCH (p:Person {id: $merge_id}) DELETE p"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="p">{</span><span class="s2">"merge_id"</span><span class="p">:</span> <span class="n">merge_id</span><span class="p">}</span>
</span></span><span class="line"><span class="cl"> <span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">await</span> <span class="n">tx</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
</span></span></code></pre></div>
<h4 id="data-standardization" class="position-relative d-flex align-items-center group">
<span>Data Standardization</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="data-standardization"
aria-haspopup="dialog"
aria-label="Share link: Data Standardization">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">email</span><span class="w"> </span><span class="py">addresses</span><span class="w"> </span><span class="p">(</span><span class="py">lowercase</span><span class="p">,</span><span class="w"> </span><span class="py">trimmed</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">LOWER</span><span class="p">(</span><span class="py">TRIM</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">))</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">phone</span><span class="w"> </span><span class="py">numbers</span><span class="w"> </span><span class="p">(</span><span class="py">remove</span><span class="w"> </span><span class="py">formatting</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">REGEXP_REPLACE</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="p">[</span><span class="err">^</span><span class="py">0</span><span class="err">-</span><span class="py">9</span><span class="err">+</span><span class="p">]</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="err">''</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">g</span><span class="err">'</span><span class="p">)</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Standardize</span><span class="w"> </span><span class="py">country</span><span class="w"> </span><span class="py">codes</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="w"> </span><span class="py">IS</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">UPPER</span><span class="p">(</span><span class="py">TRIM</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">))</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">LENGTH</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">country</span><span class="p">)</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">2</span><span class="err">;</span><span class="w"> </span><span class="err">--</span><span class="w"> </span><span class="py">ISO</span><span class="w"> </span><span class="py">3166</span><span class="err">-</span><span class="py">1</span><span class="w"> </span><span class="py">alpha</span><span class="err">-</span><span class="py">2</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="fixing-null-vs-empty-string" class="position-relative d-flex align-items-center group">
<span>Fixing NULL vs Empty String</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="fixing-null-vs-empty-string"
aria-haspopup="dialog"
aria-label="Share link: Fixing NULL vs Empty String">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Convert</span><span class="w"> </span><span class="py">empty</span><span class="w"> </span><span class="py">strings</span><span class="w"> </span><span class="py">to</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">for</span><span class="w"> </span><span class="py">consistency</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WHERE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SET</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">middle_name</span><span class="w"> </span><span class="py">END</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="w"> </span><span class="py">END</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="err">''</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">bio</span><span class="w"> </span><span class="py">END</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="quality-monitoring-and-metrics" class="position-relative d-flex align-items-center group">
<span>Quality Monitoring and Metrics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="quality-monitoring-and-metrics"
aria-haspopup="dialog"
aria-label="Share link: Quality Monitoring and Metrics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Continuous monitoring ensures data quality doesn’t degrade over time.</p>
<h4 id="quality-dashboard" class="position-relative d-flex align-items-center group">
<span>Quality Dashboard</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="quality-dashboard"
aria-haspopup="dialog"
aria-label="Share link: Quality Dashboard">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Comprehensive</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">metrics</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">WITH</span><span class="w"> </span><span class="py">person_metrics</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">MATCH</span><span class="w"> </span><span class="p">(</span><span class="py">p</span><span class="p">:</span><span class="nc">Person</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">RETURN</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="err">*</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">has_email</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">COUNT</span><span class="p">(</span><span class="py">p</span><span class="err">.</span><span class="py">phone</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">has_phone</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">SUM</span><span class="p">(</span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err"><</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">OR</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">age</span><span class="w"> </span><span class="err">></span><span class="w"> </span><span class="py">150</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">1</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">END</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">invalid_age</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">SUM</span><span class="p">(</span><span class="py">CASE</span><span class="w"> </span><span class="py">WHEN</span><span class="w"> </span><span class="py">p</span><span class="err">.</span><span class="py">email</span><span class="w"> </span><span class="p">!</span><span class="err">~</span><span class="w"> </span><span class="err">'^</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+@</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+\.</span><span class="p">[</span><span class="err">^@</span><span class="p">]</span><span class="err">+$'</span><span class="w"> </span><span class="py">THEN</span><span class="w"> </span><span class="py">1</span><span class="w"> </span><span class="py">ELSE</span><span class="w"> </span><span class="py">0</span><span class="w"> </span><span class="py">END</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">invalid_email</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">'</span><span class="py">Completeness</span><span class="p">:</span><span class="w"> </span><span class="nc">Email</span><span class="err">'</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">metric</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">has_email</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w"> </span><span class="py">AS</span><span class="w"> </span><span class="py">score</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">'</span><span class="py">Completeness</span><span class="p">:</span><span class="w"> </span><span class="nc">Phone</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="py">has_phone</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">'</span><span class="py">Validity</span><span class="p">:</span><span class="w"> </span><span class="nc">Age</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="p">(</span><span class="py">total</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">invalid_age</span><span class="p">)</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">total</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="kc">UNION</span><span class="w"> </span><span class="py">ALL</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">SELECT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="err">'</span><span class="py">Validity</span><span class="p">:</span><span class="w"> </span><span class="nc">Email</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">ROUND</span><span class="p">(</span><span class="py">100</span><span class="mf">.0</span><span class="w"> </span><span class="err">*</span><span class="w"> </span><span class="p">(</span><span class="py">has_email</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="py">invalid_email</span><span class="p">)</span><span class="w"> </span><span class="err">/</span><span class="w"> </span><span class="py">has_email</span><span class="p">,</span><span class="w"> </span><span class="py">2</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">FROM</span><span class="w"> </span><span class="py">person_metrics</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h4 id="automated-quality-checks" class="position-relative d-flex align-items-center group">
<span>Automated Quality Checks</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="automated-quality-checks"
aria-haspopup="dialog"
aria-label="Share link: Automated Quality Checks">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Python - automated quality monitoring</span>
</span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">dataclasses</span> <span class="kn">import</span> <span class="n">dataclass</span>
</span></span><span class="line"><span class="cl"><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span>
</span></span><span class="line"><span class="cl"><span class="kn">import</span> <span class="nn">asyncio</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="nd">@dataclass</span>
</span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">QualityCheck</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span>
</span></span><span class="line"><span class="cl"> <span class="n">query</span><span class="p">:</span> <span class="nb">str</span>
</span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="c1"># Acceptable quality score (0-100)</span>
</span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">QualityMonitor</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">client</span><span class="p">:</span> <span class="n">Client</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">client</span> <span class="o">=</span> <span class="n">client</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">checks</span> <span class="o">=</span> <span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"Email Completeness"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"MATCH (p:Person) RETURN COUNT(p.email) * 100.0 / COUNT(*) AS score"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">95.0</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="o">=</span><span class="kc">True</span>
</span></span><span class="line"><span class="cl"> <span class="p">),</span>
</span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"Valid Email Format"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"""MATCH (p:Person)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.email IS NOT NULL
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN SUM(CASE WHEN p.email ~ '^[^@]+@[^@]+\.[^@]+$' THEN 1 ELSE 0 END)
</span></span></span><span class="line"><span class="cl"><span class="s2"> * 100.0 / COUNT(*) AS score"""</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">99.0</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">critical</span><span class="o">=</span><span class="kc">True</span>
</span></span><span class="line"><span class="cl"> <span class="p">),</span>
</span></span><span class="line"><span class="cl"> <span class="n">QualityCheck</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"Age Validity"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"""MATCH (p:Person)
</span></span></span><span class="line"><span class="cl"><span class="s2"> WHERE p.age IS NOT NULL
</span></span></span><span class="line"><span class="cl"><span class="s2"> RETURN SUM(CASE WHEN p.age >= 0 AND p.age <= 150 THEN 1 ELSE 0 END)
</span></span></span><span class="line"><span class="cl"><span class="s2"> * 100.0 / COUNT(*) AS score"""</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="n">threshold</span><span class="o">=</span><span class="mf">99.5</span>
</span></span><span class="line"><span class="cl"> <span class="p">),</span>
</span></span><span class="line"><span class="cl"> <span class="p">]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">def</span> <span class="nf">run_check</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">check</span><span class="p">:</span> <span class="n">QualityCheck</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">result</span> <span class="o">=</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">check</span><span class="o">.</span><span class="n">query</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">score</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">bindings</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'score'</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="n">passed</span> <span class="o">=</span> <span class="n">score</span> <span class="o">>=</span> <span class="n">check</span><span class="o">.</span><span class="n">threshold</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"name"</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"score"</span><span class="p">:</span> <span class="n">score</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"threshold"</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">threshold</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"passed"</span><span class="p">:</span> <span class="n">passed</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"critical"</span><span class="p">:</span> <span class="n">check</span><span class="o">.</span><span class="n">critical</span>
</span></span><span class="line"><span class="cl"> <span class="p">}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">def</span> <span class="nf">run_all_checks</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">results</span> <span class="o">=</span> <span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">gather</span><span class="p">(</span><span class="o">*</span><span class="p">[</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">run_check</span><span class="p">(</span><span class="n">check</span><span class="p">)</span> <span class="k">for</span> <span class="n">check</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">checks</span>
</span></span><span class="line"><span class="cl"> <span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="n">failed_critical</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">results</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">r</span><span class="p">[</span><span class="s1">'passed'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">r</span><span class="p">[</span><span class="s1">'critical'</span><span class="p">]]</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"timestamp"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">(),</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"checks"</span><span class="p">:</span> <span class="n">results</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"overall_passed"</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">failed_critical</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"failed_critical_count"</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">failed_critical</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="p">}</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="c1"># Usage</span>
</span></span><span class="line"><span class="cl"><span class="k">async</span> <span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
</span></span><span class="line"><span class="cl"> <span class="n">client</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"localhost"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">3141</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="k">async</span> <span class="k">with</span> <span class="n">client</span><span class="o">.</span><span class="n">connection</span><span class="p">()</span> <span class="k">as</span> <span class="n">conn</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">monitor</span> <span class="o">=</span> <span class="n">QualityMonitor</span><span class="p">(</span><span class="n">client</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="n">report</span> <span class="o">=</span> <span class="k">await</span> <span class="n">monitor</span><span class="o">.</span><span class="n">run_all_checks</span><span class="p">()</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Quality Report - </span><span class="si">{</span><span class="n">report</span><span class="p">[</span><span class="s1">'timestamp'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Overall Status: </span><span class="si">{</span><span class="s1">'PASS'</span> <span class="k">if</span> <span class="n">report</span><span class="p">[</span><span class="s1">'overall_passed'</span><span class="p">]</span> <span class="k">else</span> <span class="s1">'FAIL'</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">for</span> <span class="n">check</span> <span class="ow">in</span> <span class="n">report</span><span class="p">[</span><span class="s1">'checks'</span><span class="p">]:</span>
</span></span><span class="line"><span class="cl"> <span class="n">status</span> <span class="o">=</span> <span class="s1">'✓'</span> <span class="k">if</span> <span class="n">check</span><span class="p">[</span><span class="s1">'passed'</span><span class="p">]</span> <span class="k">else</span> <span class="s1">'✗'</span>
</span></span><span class="line"><span class="cl"> <span class="n">critical</span> <span class="o">=</span> <span class="s1">' [CRITICAL]'</span> <span class="k">if</span> <span class="n">check</span><span class="p">[</span><span class="s1">'critical'</span><span class="p">]</span> <span class="k">else</span> <span class="s1">''</span>
</span></span><span class="line"><span class="cl"> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">status</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">'score'</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">% "</span>
</span></span><span class="line"><span class="cl"> <span class="sa">f</span><span class="s2">"(threshold: </span><span class="si">{</span><span class="n">check</span><span class="p">[</span><span class="s1">'threshold'</span><span class="p">]</span><span class="si">}</span><span class="s2">%)</span><span class="si">{</span><span class="n">critical</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</span></span></code></pre></div>
<h3 id="data-governance" class="position-relative d-flex align-items-center group">
<span>Data Governance</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="data-governance"
aria-haspopup="dialog"
aria-label="Share link: Data Governance">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p>Establish policies and procedures for maintaining data quality.</p>
<h4 id="quality-rules-documentation" class="position-relative d-flex align-items-center group">
<span>Quality Rules Documentation</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="quality-rules-documentation"
aria-haspopup="dialog"
aria-label="Share link: Quality Rules Documentation">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="c1"># Define quality rules as code</span>
</span></span><span class="line"><span class="cl"><span class="n">QUALITY_RULES</span> <span class="o">=</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"person_email"</span><span class="p">:</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"description"</span><span class="p">:</span> <span class="s2">"Every person must have a valid email address"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"check"</span><span class="p">:</span> <span class="s2">"p.email IS NOT NULL AND p.email ~ '^[^@]+@[^@]+</span><span class="se">\\</span><span class="s2">.[^@]+$'"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"severity"</span><span class="p">:</span> <span class="s2">"critical"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"owner"</span><span class="p">:</span> <span class="s2">"[email protected]"</span>
</span></span><span class="line"><span class="cl"> <span class="p">},</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"order_total"</span><span class="p">:</span> <span class="p">{</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"description"</span><span class="p">:</span> <span class="s2">"Order total must match sum of line items"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"check"</span><span class="p">:</span> <span class="s2">"o.total = (MATCH (o)-[:HAS_ITEM]->(i) RETURN SUM(i.price * i.quantity))"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"severity"</span><span class="p">:</span> <span class="s2">"high"</span><span class="p">,</span>
</span></span><span class="line"><span class="cl"> <span class="s2">"owner"</span><span class="p">:</span> <span class="s2">"[email protected]"</span>
</span></span><span class="line"><span class="cl"> <span class="p">},</span>
</span></span><span class="line"><span class="cl"><span class="p">}</span>
</span></span></code></pre></div>
<h4 id="audit-trail" class="position-relative d-flex align-items-center group">
<span>Audit Trail</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="audit-trail"
aria-haspopup="dialog"
aria-label="Share link: Audit Trail">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h4><div class="highlight"><pre tabindex="0" class="chroma"><code class="language-gql" data-lang="gql"><span class="line"><span class="cl"><span class="err">--</span><span class="w"> </span><span class="py">Track</span><span class="w"> </span><span class="py">data</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">issues</span><span class="w"> </span><span class="py">over</span><span class="w"> </span><span class="py">time</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">CREATE</span><span class="w"> </span><span class="py">NODE</span><span class="w"> </span><span class="py">TYPE</span><span class="w"> </span><span class="py">DataQualityIssue</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">id</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">DEFAULT</span><span class="w"> </span><span class="py">gen_random_uuid</span><span class="p">(),</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">issue_type</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">severity</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">CHECK</span><span class="w"> </span><span class="p">(</span><span class="py">severity</span><span class="w"> </span><span class="py">IN</span><span class="w"> </span><span class="p">(</span><span class="err">'</span><span class="py">low</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">medium</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">high</span><span class="err">'</span><span class="p">,</span><span class="w"> </span><span class="err">'</span><span class="py">critical</span><span class="err">'</span><span class="p">)),</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">detected_at</span><span class="w"> </span><span class="py">TIMESTAMP</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="w"> </span><span class="py">DEFAULT</span><span class="w"> </span><span class="py">NOW</span><span class="p">(),</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">resolved_at</span><span class="w"> </span><span class="py">TIMESTAMP</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_type</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_id</span><span class="w"> </span><span class="py">STRING</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">description</span><span class="w"> </span><span class="py">TEXT</span><span class="w"> </span><span class="py">NOT</span><span class="w"> </span><span class="py">NULL</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">resolution</span><span class="w"> </span><span class="py">TEXT</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="err">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="err">--</span><span class="w"> </span><span class="py">Log</span><span class="w"> </span><span class="py">quality</span><span class="w"> </span><span class="py">issue</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="py">INSERT</span><span class="w"> </span><span class="p">(</span><span class="py">issue</span><span class="p">:</span><span class="nc">DataQualityIssue</span><span class="w"> </span><span class="p">{</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">issue_type</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">invalid_email</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">severity</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">high</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_type</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">Person</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">entity_id</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">person</span><span class="err">-</span><span class="py">123</span><span class="err">'</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="py">description</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="nc">Email</span><span class="w"> </span><span class="py">format</span><span class="w"> </span><span class="py">validation</span><span class="w"> </span><span class="py">failed</span><span class="err">'</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">})</span><span class="err">;</span><span class="w">
</span></span></span></code></pre></div>
<h3 id="best-practices" class="position-relative d-flex align-items-center group">
<span>Best Practices</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="best-practices"
aria-haspopup="dialog"
aria-label="Share link: Best Practices">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ol>
<li><strong>Define Quality Standards Early</strong>: Establish quality metrics during schema design</li>
<li><strong>Automate Quality Checks</strong>: Run validation on every data import and update</li>
<li><strong>Profile Regularly</strong>: Schedule weekly or daily data profiling jobs</li>
<li><strong>Track Metrics Over Time</strong>: Monitor quality trends to detect degradation</li>
<li><strong>Document Quality Rules</strong>: Maintain a catalog of what constitutes “quality”</li>
<li><strong>Assign Ownership</strong>: Each data domain should have a quality owner</li>
<li><strong>Prevent at Source</strong>: Validate data at input rather than fixing later</li>
<li><strong>Clean Incrementally</strong>: Don’t wait for major cleansing projects</li>
<li><strong>Version Quality Rules</strong>: Track changes to quality definitions</li>
<li><strong>Alert on Critical Issues</strong>: Notify stakeholders when quality drops below thresholds</li>
</ol>
<h3 id="common-quality-anti-patterns" class="position-relative d-flex align-items-center group">
<span>Common Quality Anti-Patterns</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="common-quality-anti-patterns"
aria-haspopup="dialog"
aria-label="Share link: Common Quality Anti-Patterns">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><p><strong>Accepting “good enough” data</strong>: Quality degrades exponentially over time</p>
<p><strong>Manual cleansing</strong>: Doesn’t scale, introduces human error</p>
<p><strong>Ignoring outliers</strong>: Outliers often indicate deeper quality issues</p>
<p><strong>No monitoring</strong>: Quality issues go unnoticed until crisis</p>
<p><strong>Fixing symptoms not causes</strong>: Cleanse source data, not just database</p>
<h3 id="related-topics" class="position-relative d-flex align-items-center group">
<span>Related Topics</span>
<button type="button"
class="h-share btn btn-link p-0 text-decoration-none link-secondary opacity-50 hover-opacity-100 transition-all ms-1"
data-share-target="related-topics"
aria-haspopup="dialog"
aria-label="Share link: Related Topics">
<i class="fa-sharp-duotone fa-solid fa-share-nodes" aria-hidden="true" style="font-size: 0.8em;"></i>
<span class="visually-hidden">Share link</span>
</button>
</h3><ul>
<li><a
href="/tags/validation"
>Validation</a>
- Data validation techniques</li>
<li><a
href="/tags/constraints"
>Constraints</a>
- Schema constraint enforcement</li>
<li><a
href="/tags/etl"
>ETL</a>
- Data integration quality</li>
<li><a
href="/tags/monitoring"
>Monitoring</a>
- System monitoring</li>
<li><a
href="/tags/governance/"
>Data Governance</a>
- Governance policies</li>
<li><a
href="/tags/testing"
>Testing</a>
- Quality testing strategies</li>
</ul>
Related Articles
No articles found with this tag yet.
Back to Home