- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
229 lines
8.5 KiB
YAML
229 lines
8.5 KiB
YAML
# DNS Alert Rules for Cloudflare Mesh Observatory
|
|
# Phase 5B - Alerts & Escalation
|
|
|
|
groups:
|
|
- name: dns_alerts
|
|
interval: 60s
|
|
rules:
|
|
# ============================================
|
|
# CRITICAL - DNS Hijack Detection
|
|
# ============================================
|
|
- alert: DNSHijackDetected
|
|
expr: cloudflare_dns_record_mismatch == 1
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
component: dns
|
|
playbook: dns-compromise
|
|
security_incident: "true"
|
|
annotations:
|
|
summary: "POTENTIAL DNS HIJACK: {{ $labels.record_name }}"
|
|
description: |
|
|
DNS record {{ $labels.record_name }} ({{ $labels.record_type }}) in zone
|
|
{{ $labels.zone }} does not match expected value.
|
|
|
|
Expected: {{ $labels.expected_value }}
|
|
Actual: {{ $labels.actual_value }}
|
|
|
|
This may indicate DNS hijacking or unauthorized modification.
|
|
TREAT AS SECURITY INCIDENT until verified.
|
|
impact: "Traffic may be routed to unauthorized destinations"
|
|
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
|
|
|
# ============================================
|
|
# CRITICAL - Critical DNS Record Missing
|
|
# ============================================
|
|
- alert: CriticalDNSRecordMissing
|
|
expr: cloudflare_dns_critical_record_exists == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
component: dns
|
|
playbook: dns-compromise
|
|
annotations:
|
|
summary: "Critical DNS record missing: {{ $labels.record_name }}"
|
|
description: |
|
|
Critical DNS record {{ $labels.record_name }} ({{ $labels.record_type }})
|
|
is missing from zone {{ $labels.zone }}.
|
|
This record is marked as critical in the DNS manifest.
|
|
impact: "Service reachability may be affected"
|
|
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
|
|
|
# ============================================
|
|
# WARNING - DNS Drift Detected
|
|
# ============================================
|
|
- alert: DNSDriftDetected
|
|
expr: cloudflare_dns_drift_count > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS drift detected in zone {{ $labels.zone }}"
|
|
description: |
|
|
{{ $value }} DNS records in zone {{ $labels.zone }} differ from
|
|
the expected baseline configuration.
|
|
|
|
Run state reconciler to identify specific changes.
|
|
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
|
|
|
# ============================================
|
|
# WARNING - DNS Record TTL Mismatch
|
|
# ============================================
|
|
- alert: DNSTTLMismatch
|
|
expr: cloudflare_dns_ttl_mismatch == 1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS TTL mismatch: {{ $labels.record_name }}"
|
|
description: |
|
|
DNS record {{ $labels.record_name }} has unexpected TTL.
|
|
Expected: {{ $labels.expected_ttl }}s
|
|
Actual: {{ $labels.actual_ttl }}s
|
|
|
|
This may affect caching behavior and failover timing.
|
|
|
|
# ============================================
|
|
# WARNING - DNS Propagation Slow
|
|
# ============================================
|
|
- alert: DNSPropagationSlow
|
|
expr: cloudflare_dns_propagation_time_seconds > 300
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "Slow DNS propagation for {{ $labels.record_name }}"
|
|
description: |
|
|
DNS changes for {{ $labels.record_name }} are taking longer than
|
|
5 minutes to propagate.
|
|
Current propagation time: {{ $value | humanizeDuration }}
|
|
|
|
# ============================================
|
|
# CRITICAL - DNS Propagation Failed
|
|
# ============================================
|
|
- alert: DNSPropagationFailed
|
|
expr: cloudflare_dns_propagation_time_seconds > 900
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS propagation failed for {{ $labels.record_name }}"
|
|
description: |
|
|
DNS changes for {{ $labels.record_name }} have not propagated
|
|
after 15 minutes. This may indicate a configuration issue.
|
|
|
|
# ============================================
|
|
# WARNING - Unexpected DNS Record
|
|
# ============================================
|
|
- alert: UnexpectedDNSRecord
|
|
expr: cloudflare_dns_unexpected_record == 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "Unexpected DNS record: {{ $labels.record_name }}"
|
|
description: |
|
|
DNS record {{ $labels.record_name }} ({{ $labels.record_type }}) exists
|
|
but is not defined in the DNS manifest.
|
|
This may be an unauthorized addition.
|
|
|
|
# ============================================
|
|
# INFO - DNS Record Added
|
|
# ============================================
|
|
- alert: DNSRecordAdded
|
|
expr: increase(cloudflare_dns_records_total[1h]) > 0
|
|
for: 0m
|
|
labels:
|
|
severity: info
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS record added in zone {{ $labels.zone }}"
|
|
description: |
|
|
{{ $value }} new DNS record(s) detected in zone {{ $labels.zone }}
|
|
in the last hour. Verify this was authorized.
|
|
|
|
# ============================================
|
|
# INFO - DNS Record Removed
|
|
# ============================================
|
|
- alert: DNSRecordRemoved
|
|
expr: decrease(cloudflare_dns_records_total[1h]) > 0
|
|
for: 0m
|
|
labels:
|
|
severity: info
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS record removed from zone {{ $labels.zone }}"
|
|
description: |
|
|
{{ $value }} DNS record(s) removed from zone {{ $labels.zone }}
|
|
in the last hour. Verify this was authorized.
|
|
|
|
# ============================================
|
|
# WARNING - DNSSEC Disabled
|
|
# ============================================
|
|
- alert: DNSSECDisabled
|
|
expr: cloudflare_zone_dnssec_enabled == 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "DNSSEC disabled for zone {{ $labels.zone }}"
|
|
description: |
|
|
DNSSEC is not enabled for zone {{ $labels.zone }}.
|
|
This reduces protection against DNS spoofing attacks.
|
|
|
|
# ============================================
|
|
# WARNING - Zone Transfer Enabled
|
|
# ============================================
|
|
- alert: ZoneTransferEnabled
|
|
expr: cloudflare_zone_axfr_enabled == 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "Zone transfer (AXFR) enabled for {{ $labels.zone }}"
|
|
description: |
|
|
Zone transfer is enabled for {{ $labels.zone }}.
|
|
This exposes DNS records to potential enumeration.
|
|
Disable unless explicitly required.
|
|
|
|
# ============================================
|
|
# CRITICAL - DNS Query Spike
|
|
# ============================================
|
|
- alert: DNSQuerySpike
|
|
expr: |
|
|
rate(cloudflare_dns_queries_total[5m])
|
|
> 3 * avg_over_time(rate(cloudflare_dns_queries_total[5m])[24h:5m])
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "DNS query spike for zone {{ $labels.zone }}"
|
|
description: |
|
|
DNS queries for zone {{ $labels.zone }} are 3x above the 24-hour average.
|
|
This may indicate a DDoS attack or misconfigured client.
|
|
|
|
# ============================================
|
|
# WARNING - High DNS Error Rate
|
|
# ============================================
|
|
- alert: HighDNSErrorRate
|
|
expr: |
|
|
rate(cloudflare_dns_errors_total[5m])
|
|
/ rate(cloudflare_dns_queries_total[5m]) > 0.01
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
component: dns
|
|
annotations:
|
|
summary: "High DNS error rate for zone {{ $labels.zone }}"
|
|
description: |
|
|
DNS error rate exceeds 1% for zone {{ $labels.zone }}.
|
|
Current error rate: {{ $value | humanizePercentage }}
|