Initial commit: Cloudflare infrastructure with WAF Intelligence
- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
This commit is contained in:
228
observatory/prometheus/alerts/dns-alerts.yml
Normal file
228
observatory/prometheus/alerts/dns-alerts.yml
Normal file
@@ -0,0 +1,228 @@
|
||||
# DNS Alert Rules for Cloudflare Mesh Observatory
|
||||
# Phase 5B - Alerts & Escalation
|
||||
|
||||
groups:
|
||||
- name: dns_alerts
|
||||
interval: 60s
|
||||
rules:
|
||||
# ============================================
|
||||
# CRITICAL - DNS Hijack Detection
|
||||
# ============================================
|
||||
- alert: DNSHijackDetected
|
||||
expr: cloudflare_dns_record_mismatch == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: dns
|
||||
playbook: dns-compromise
|
||||
security_incident: "true"
|
||||
annotations:
|
||||
summary: "POTENTIAL DNS HIJACK: {{ $labels.record_name }}"
|
||||
description: |
|
||||
DNS record {{ $labels.record_name }} ({{ $labels.record_type }}) in zone
|
||||
{{ $labels.zone }} does not match expected value.
|
||||
|
||||
Expected: {{ $labels.expected_value }}
|
||||
Actual: {{ $labels.actual_value }}
|
||||
|
||||
This may indicate DNS hijacking or unauthorized modification.
|
||||
TREAT AS SECURITY INCIDENT until verified.
|
||||
impact: "Traffic may be routed to unauthorized destinations"
|
||||
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Critical DNS Record Missing
|
||||
# ============================================
|
||||
- alert: CriticalDNSRecordMissing
|
||||
expr: cloudflare_dns_critical_record_exists == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: dns
|
||||
playbook: dns-compromise
|
||||
annotations:
|
||||
summary: "Critical DNS record missing: {{ $labels.record_name }}"
|
||||
description: |
|
||||
Critical DNS record {{ $labels.record_name }} ({{ $labels.record_type }})
|
||||
is missing from zone {{ $labels.zone }}.
|
||||
This record is marked as critical in the DNS manifest.
|
||||
impact: "Service reachability may be affected"
|
||||
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
||||
|
||||
# ============================================
|
||||
# WARNING - DNS Drift Detected
|
||||
# ============================================
|
||||
- alert: DNSDriftDetected
|
||||
expr: cloudflare_dns_drift_count > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS drift detected in zone {{ $labels.zone }}"
|
||||
description: |
|
||||
{{ $value }} DNS records in zone {{ $labels.zone }} differ from
|
||||
the expected baseline configuration.
|
||||
|
||||
Run state reconciler to identify specific changes.
|
||||
runbook_url: "https://wiki.internal/playbooks/dns-compromise"
|
||||
|
||||
# ============================================
|
||||
# WARNING - DNS Record TTL Mismatch
|
||||
# ============================================
|
||||
- alert: DNSTTLMismatch
|
||||
expr: cloudflare_dns_ttl_mismatch == 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS TTL mismatch: {{ $labels.record_name }}"
|
||||
description: |
|
||||
DNS record {{ $labels.record_name }} has unexpected TTL.
|
||||
Expected: {{ $labels.expected_ttl }}s
|
||||
Actual: {{ $labels.actual_ttl }}s
|
||||
|
||||
This may affect caching behavior and failover timing.
|
||||
|
||||
# ============================================
|
||||
# WARNING - DNS Propagation Slow
|
||||
# ============================================
|
||||
- alert: DNSPropagationSlow
|
||||
expr: cloudflare_dns_propagation_time_seconds > 300
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "Slow DNS propagation for {{ $labels.record_name }}"
|
||||
description: |
|
||||
DNS changes for {{ $labels.record_name }} are taking longer than
|
||||
5 minutes to propagate.
|
||||
Current propagation time: {{ $value | humanizeDuration }}
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - DNS Propagation Failed
|
||||
# ============================================
|
||||
- alert: DNSPropagationFailed
|
||||
expr: cloudflare_dns_propagation_time_seconds > 900
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS propagation failed for {{ $labels.record_name }}"
|
||||
description: |
|
||||
DNS changes for {{ $labels.record_name }} have not propagated
|
||||
after 15 minutes. This may indicate a configuration issue.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Unexpected DNS Record
|
||||
# ============================================
|
||||
- alert: UnexpectedDNSRecord
|
||||
expr: cloudflare_dns_unexpected_record == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "Unexpected DNS record: {{ $labels.record_name }}"
|
||||
description: |
|
||||
DNS record {{ $labels.record_name }} ({{ $labels.record_type }}) exists
|
||||
but is not defined in the DNS manifest.
|
||||
This may be an unauthorized addition.
|
||||
|
||||
# ============================================
|
||||
# INFO - DNS Record Added
|
||||
# ============================================
|
||||
- alert: DNSRecordAdded
|
||||
expr: increase(cloudflare_dns_records_total[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS record added in zone {{ $labels.zone }}"
|
||||
description: |
|
||||
{{ $value }} new DNS record(s) detected in zone {{ $labels.zone }}
|
||||
in the last hour. Verify this was authorized.
|
||||
|
||||
# ============================================
|
||||
# INFO - DNS Record Removed
|
||||
# ============================================
|
||||
- alert: DNSRecordRemoved
|
||||
expr: decrease(cloudflare_dns_records_total[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS record removed from zone {{ $labels.zone }}"
|
||||
description: |
|
||||
{{ $value }} DNS record(s) removed from zone {{ $labels.zone }}
|
||||
in the last hour. Verify this was authorized.
|
||||
|
||||
# ============================================
|
||||
# WARNING - DNSSEC Disabled
|
||||
# ============================================
|
||||
- alert: DNSSECDisabled
|
||||
expr: cloudflare_zone_dnssec_enabled == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNSSEC disabled for zone {{ $labels.zone }}"
|
||||
description: |
|
||||
DNSSEC is not enabled for zone {{ $labels.zone }}.
|
||||
This reduces protection against DNS spoofing attacks.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Zone Transfer Enabled
|
||||
# ============================================
|
||||
- alert: ZoneTransferEnabled
|
||||
expr: cloudflare_zone_axfr_enabled == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "Zone transfer (AXFR) enabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone transfer is enabled for {{ $labels.zone }}.
|
||||
This exposes DNS records to potential enumeration.
|
||||
Disable unless explicitly required.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - DNS Query Spike
|
||||
# ============================================
|
||||
- alert: DNSQuerySpike
|
||||
expr: |
|
||||
rate(cloudflare_dns_queries_total[5m])
|
||||
> 3 * avg_over_time(rate(cloudflare_dns_queries_total[5m])[24h:5m])
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "DNS query spike for zone {{ $labels.zone }}"
|
||||
description: |
|
||||
DNS queries for zone {{ $labels.zone }} are 3x above the 24-hour average.
|
||||
This may indicate a DDoS attack or misconfigured client.
|
||||
|
||||
# ============================================
|
||||
# WARNING - High DNS Error Rate
|
||||
# ============================================
|
||||
- alert: HighDNSErrorRate
|
||||
expr: |
|
||||
rate(cloudflare_dns_errors_total[5m])
|
||||
/ rate(cloudflare_dns_queries_total[5m]) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: dns
|
||||
annotations:
|
||||
summary: "High DNS error rate for zone {{ $labels.zone }}"
|
||||
description: |
|
||||
DNS error rate exceeds 1% for zone {{ $labels.zone }}.
|
||||
Current error rate: {{ $value | humanizePercentage }}
|
||||
284
observatory/prometheus/alerts/invariant-alerts.yml
Normal file
284
observatory/prometheus/alerts/invariant-alerts.yml
Normal file
@@ -0,0 +1,284 @@
|
||||
# Security Invariant Alert Rules for Cloudflare Mesh Observatory
|
||||
# Phase 5B - Alerts & Escalation
|
||||
|
||||
groups:
|
||||
- name: invariant_alerts
|
||||
interval: 60s
|
||||
rules:
|
||||
# ============================================
|
||||
# CRITICAL - SSL Mode Downgrade
|
||||
# ============================================
|
||||
- alert: SSLModeDowngraded
|
||||
expr: cloudflare_zone_ssl_mode != 1 # 1 = Full (Strict)
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
invariant_name: ssl_strict_mode
|
||||
category: encryption
|
||||
frameworks: "SOC2,PCI-DSS,ISO27001"
|
||||
annotations:
|
||||
summary: "SSL mode is not Full (Strict) for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone {{ $labels.zone }} SSL mode has been changed from Full (Strict).
|
||||
Current mode: {{ $labels.ssl_mode }}
|
||||
|
||||
This weakens TLS security and may allow MITM attacks.
|
||||
This is a compliance violation for multiple frameworks.
|
||||
impact: "Reduced TLS security, potential MITM vulnerability"
|
||||
runbook_url: "https://wiki.internal/invariants/ssl-mode"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Always Use HTTPS Disabled
|
||||
# ============================================
|
||||
- alert: HTTPSNotEnforced
|
||||
expr: cloudflare_zone_always_use_https == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
invariant_name: always_use_https
|
||||
category: encryption
|
||||
frameworks: "SOC2,PCI-DSS,HIPAA"
|
||||
annotations:
|
||||
summary: "Always Use HTTPS disabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone {{ $labels.zone }} allows HTTP traffic.
|
||||
This may expose sensitive data in transit.
|
||||
impact: "Data transmitted over unencrypted connections"
|
||||
runbook_url: "https://wiki.internal/invariants/https-enforcement"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - TLS Version Below Minimum
|
||||
# ============================================
|
||||
- alert: TLSVersionTooLow
|
||||
expr: cloudflare_zone_min_tls_version < 1.2
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
invariant_name: min_tls_version
|
||||
category: encryption
|
||||
frameworks: "PCI-DSS,NIST"
|
||||
annotations:
|
||||
summary: "Minimum TLS version below 1.2 for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone {{ $labels.zone }} allows TLS versions below 1.2.
|
||||
Current minimum: TLS {{ $labels.min_tls }}
|
||||
|
||||
TLS 1.0 and 1.1 have known vulnerabilities.
|
||||
PCI-DSS requires TLS 1.2 minimum.
|
||||
impact: "Vulnerable TLS versions allowed"
|
||||
runbook_url: "https://wiki.internal/invariants/tls-version"
|
||||
|
||||
# ============================================
|
||||
# WARNING - HSTS Not Enabled
|
||||
# ============================================
|
||||
- alert: HSTSNotEnabled
|
||||
expr: cloudflare_zone_hsts_enabled == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: hsts_enabled
|
||||
category: encryption
|
||||
frameworks: "SOC2,OWASP"
|
||||
annotations:
|
||||
summary: "HSTS not enabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
HTTP Strict Transport Security is not enabled for {{ $labels.zone }}.
|
||||
This allows SSL stripping attacks.
|
||||
runbook_url: "https://wiki.internal/invariants/hsts"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Security Headers Missing
|
||||
# ============================================
|
||||
- alert: SecurityHeadersMissing
|
||||
expr: cloudflare_zone_security_headers_score < 0.8
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: security_headers
|
||||
category: headers
|
||||
frameworks: "OWASP,SOC2"
|
||||
annotations:
|
||||
summary: "Security headers score below threshold for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone {{ $labels.zone }} security headers score: {{ $value }}
|
||||
Expected minimum: 0.8
|
||||
|
||||
Missing headers may include: CSP, X-Frame-Options, X-Content-Type-Options
|
||||
runbook_url: "https://wiki.internal/invariants/security-headers"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Origin IP Exposed
|
||||
# ============================================
|
||||
- alert: OriginIPExposed
|
||||
expr: cloudflare_origin_ip_exposed == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
invariant_name: origin_hidden
|
||||
category: network
|
||||
frameworks: "SOC2"
|
||||
annotations:
|
||||
summary: "Origin IP may be exposed for {{ $labels.zone }}"
|
||||
description: |
|
||||
DNS or headers may be exposing the origin server IP.
|
||||
Exposed via: {{ $labels.exposure_method }}
|
||||
|
||||
Attackers can bypass Cloudflare protection by attacking origin directly.
|
||||
impact: "Origin server exposed to direct attacks"
|
||||
runbook_url: "https://wiki.internal/invariants/origin-protection"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Rate Limiting Not Configured
|
||||
# ============================================
|
||||
- alert: RateLimitingMissing
|
||||
expr: cloudflare_zone_rate_limiting_rules == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: rate_limiting
|
||||
category: protection
|
||||
frameworks: "SOC2,OWASP"
|
||||
annotations:
|
||||
summary: "No rate limiting rules for {{ $labels.zone }}"
|
||||
description: |
|
||||
Zone {{ $labels.zone }} has no rate limiting rules configured.
|
||||
This leaves the zone vulnerable to brute force attacks.
|
||||
runbook_url: "https://wiki.internal/invariants/rate-limiting"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Authenticated Origin Pulls Disabled
|
||||
# ============================================
|
||||
- alert: AuthenticatedOriginPullsDisabled
|
||||
expr: cloudflare_zone_authenticated_origin_pulls == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: aop_enabled
|
||||
category: authentication
|
||||
frameworks: "SOC2,Zero-Trust"
|
||||
annotations:
|
||||
summary: "Authenticated Origin Pulls disabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Authenticated Origin Pulls is not enabled for {{ $labels.zone }}.
|
||||
Origin cannot verify requests come from Cloudflare.
|
||||
runbook_url: "https://wiki.internal/invariants/authenticated-origin-pulls"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Bot Protection Disabled
|
||||
# ============================================
|
||||
- alert: BotProtectionDisabled
|
||||
expr: cloudflare_zone_bot_management_enabled == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: bot_management
|
||||
category: protection
|
||||
annotations:
|
||||
summary: "Bot management disabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Bot management is not enabled for {{ $labels.zone }}.
|
||||
Zone is vulnerable to automated attacks and scraping.
|
||||
runbook_url: "https://wiki.internal/invariants/bot-management"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Access Policy Violation
|
||||
# ============================================
|
||||
- alert: AccessPolicyViolation
|
||||
expr: cloudflare_access_policy_violations > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
invariant_name: access_policy
|
||||
category: access_control
|
||||
frameworks: "SOC2,Zero-Trust,ISO27001"
|
||||
annotations:
|
||||
summary: "Access policy violations detected"
|
||||
description: |
|
||||
{{ $value }} access policy violations detected.
|
||||
Policy: {{ $labels.policy_name }}
|
||||
|
||||
Review access logs for unauthorized access attempts.
|
||||
impact: "Potential unauthorized access"
|
||||
runbook_url: "https://wiki.internal/invariants/access-control"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Browser Integrity Check Disabled
|
||||
# ============================================
|
||||
- alert: BrowserIntegrityCheckDisabled
|
||||
expr: cloudflare_zone_browser_integrity_check == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: browser_integrity_check
|
||||
category: protection
|
||||
annotations:
|
||||
summary: "Browser Integrity Check disabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Browser Integrity Check is disabled for {{ $labels.zone }}.
|
||||
This allows requests with suspicious headers.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Email Obfuscation Disabled
|
||||
# ============================================
|
||||
- alert: EmailObfuscationDisabled
|
||||
expr: cloudflare_zone_email_obfuscation == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
component: invariant
|
||||
invariant_name: email_obfuscation
|
||||
category: privacy
|
||||
annotations:
|
||||
summary: "Email obfuscation disabled for {{ $labels.zone }}"
|
||||
description: |
|
||||
Email obfuscation is disabled. Email addresses on pages
|
||||
may be harvested by spam bots.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Development Mode Active
|
||||
# ============================================
|
||||
- alert: DevelopmentModeActive
|
||||
expr: cloudflare_zone_development_mode == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: invariant
|
||||
invariant_name: development_mode
|
||||
category: configuration
|
||||
annotations:
|
||||
summary: "Development mode active for {{ $labels.zone }}"
|
||||
description: |
|
||||
Development mode is active for {{ $labels.zone }}.
|
||||
This bypasses Cloudflare's cache and should only be used temporarily.
|
||||
Remember to disable after development is complete.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Invariant Check Failure
|
||||
# ============================================
|
||||
- alert: InvariantCheckFailed
|
||||
expr: cloudflare_invariant_check_status == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
component: invariant
|
||||
category: monitoring
|
||||
annotations:
|
||||
summary: "Invariant checker is failing"
|
||||
description: |
|
||||
The invariant checker script is not running successfully.
|
||||
Last success: {{ $labels.last_success }}
|
||||
Error: {{ $labels.error_message }}
|
||||
|
||||
Security invariants are not being monitored.
|
||||
runbook_url: "https://wiki.internal/invariants/checker-troubleshooting"
|
||||
257
observatory/prometheus/alerts/proofchain-alerts.yml
Normal file
257
observatory/prometheus/alerts/proofchain-alerts.yml
Normal file
@@ -0,0 +1,257 @@
|
||||
# Proofchain Alert Rules for Cloudflare Mesh Observatory
|
||||
# Phase 5B - Alerts & Escalation
|
||||
|
||||
groups:
|
||||
- name: proofchain_alerts
|
||||
interval: 60s
|
||||
rules:
|
||||
# ============================================
|
||||
# CRITICAL - Chain Integrity Failure
|
||||
# ============================================
|
||||
- alert: ProofchainIntegrityFailure
|
||||
expr: cloudflare_proofchain_integrity_valid == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
security_incident: "true"
|
||||
annotations:
|
||||
summary: "CRITICAL: Proofchain integrity verification FAILED"
|
||||
description: |
|
||||
Proofchain {{ $labels.chain_name }} has failed integrity verification.
|
||||
|
||||
Last valid hash: {{ $labels.last_valid_hash }}
|
||||
Expected hash: {{ $labels.expected_hash }}
|
||||
Computed hash: {{ $labels.computed_hash }}
|
||||
|
||||
This indicates potential:
|
||||
- Ledger tampering
|
||||
- Receipt corruption
|
||||
- Chain fork
|
||||
|
||||
IMMEDIATELY HALT new receipt generation until resolved.
|
||||
impact: "Audit trail integrity compromised"
|
||||
runbook_url: "https://wiki.internal/playbooks/proofchain-incident"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Receipt Hash Mismatch
|
||||
# ============================================
|
||||
- alert: ReceiptHashMismatch
|
||||
expr: cloudflare_receipt_hash_valid == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
security_incident: "true"
|
||||
annotations:
|
||||
summary: "Receipt hash mismatch detected"
|
||||
description: |
|
||||
Receipt {{ $labels.receipt_id }} ({{ $labels.receipt_type }})
|
||||
hash does not match stored value.
|
||||
|
||||
This receipt may have been modified after creation.
|
||||
Investigate for potential tampering.
|
||||
runbook_url: "https://wiki.internal/playbooks/proofchain-incident"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Anchor Missing
|
||||
# ============================================
|
||||
- alert: ProofchainAnchorMissing
|
||||
expr: cloudflare_proofchain_anchor_age_hours > 24
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Proofchain anchor overdue"
|
||||
description: |
|
||||
No proofchain anchor has been created in {{ $value | humanize }} hours.
|
||||
Anchors should be created at least daily.
|
||||
|
||||
This weakens the audit trail's immutability guarantees.
|
||||
runbook_url: "https://wiki.internal/playbooks/proofchain-maintenance"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Receipt Generation Failed
|
||||
# ============================================
|
||||
- alert: ReceiptGenerationFailed
|
||||
expr: increase(cloudflare_receipt_generation_failures_total[1h]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Receipt generation failures detected"
|
||||
description: |
|
||||
{{ $value }} receipt generation failures in the last hour.
|
||||
Receipt type: {{ $labels.receipt_type }}
|
||||
Error: {{ $labels.error_type }}
|
||||
|
||||
Operations are proceeding but not being properly logged.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Chain Growth Stalled
|
||||
# ============================================
|
||||
- alert: ProofchainGrowthStalled
|
||||
expr: increase(cloudflare_proofchain_receipts_total[6h]) == 0
|
||||
for: 6h
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "No new receipts in 6 hours"
|
||||
description: |
|
||||
Proofchain {{ $labels.chain_name }} has not received new receipts
|
||||
in 6 hours. This may indicate:
|
||||
- Receipt generation failure
|
||||
- System not operational
|
||||
- Configuration issue
|
||||
|
||||
Verify receipt generation is working.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Chain Drift from Root
|
||||
# ============================================
|
||||
- alert: ProofchainDrift
|
||||
expr: cloudflare_proofchain_drift_receipts > 100
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Proofchain has {{ $value }} unanchored receipts"
|
||||
description: |
|
||||
Chain {{ $labels.chain_name }} has {{ $value }} receipts since
|
||||
the last anchor. Consider creating a new anchor to checkpoint
|
||||
the current state.
|
||||
|
||||
# ============================================
|
||||
# INFO - Anchor Created
|
||||
# ============================================
|
||||
- alert: ProofchainAnchorCreated
|
||||
expr: changes(cloudflare_proofchain_anchor_count[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "New proofchain anchor created"
|
||||
description: |
|
||||
A new anchor has been created for chain {{ $labels.chain_name }}.
|
||||
Anchor hash: {{ $labels.anchor_hash }}
|
||||
Receipts anchored: {{ $labels.receipts_anchored }}
|
||||
|
||||
# ============================================
|
||||
# WARNING - Frontier Corruption
|
||||
# ============================================
|
||||
- alert: ProofchainFrontierCorrupt
|
||||
expr: cloudflare_proofchain_frontier_valid == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Proofchain frontier is corrupt"
|
||||
description: |
|
||||
The frontier (latest state) of chain {{ $labels.chain_name }}
|
||||
cannot be verified. The chain may be in an inconsistent state.
|
||||
|
||||
Do not append new receipts until this is resolved.
|
||||
runbook_url: "https://wiki.internal/playbooks/proofchain-incident"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Receipt Backlog
|
||||
# ============================================
|
||||
- alert: ReceiptBacklog
|
||||
expr: cloudflare_receipt_queue_depth > 100
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Receipt generation backlog"
|
||||
description: |
|
||||
{{ $value }} receipts waiting to be written.
|
||||
This may indicate performance issues or blocked writes.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Receipt Queue Overflow
|
||||
# ============================================
|
||||
- alert: ReceiptQueueOverflow
|
||||
expr: cloudflare_receipt_queue_depth > 1000
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Receipt queue overflow imminent"
|
||||
description: |
|
||||
{{ $value }} receipts in queue. Queue may overflow.
|
||||
Some operational events may not be recorded.
|
||||
Investigate and resolve immediately.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Receipt Write Latency High
|
||||
# ============================================
|
||||
- alert: ReceiptWriteLatencyHigh
|
||||
expr: cloudflare_receipt_write_duration_seconds > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "High receipt write latency"
|
||||
description: |
|
||||
Receipt write operations taking {{ $value | humanize }}s.
|
||||
This may cause backlog buildup.
|
||||
Check storage performance.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Storage Near Capacity
|
||||
# ============================================
|
||||
- alert: ProofchainStorageNearFull
|
||||
expr: cloudflare_proofchain_storage_used_bytes / cloudflare_proofchain_storage_total_bytes > 0.9
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Proofchain storage >90% full"
|
||||
description: |
|
||||
Proofchain storage is {{ $value | humanizePercentage }} full.
|
||||
Expand storage or archive old receipts immediately.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Cross-Ledger Verification Failed
|
||||
# ============================================
|
||||
- alert: CrossLedgerVerificationFailed
|
||||
expr: cloudflare_proofchain_cross_verification_valid == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "Cross-ledger verification failed"
|
||||
description: |
|
||||
Verification between {{ $labels.chain_a }} and {{ $labels.chain_b }}
|
||||
has failed. The ledgers may have diverged.
|
||||
|
||||
Investigate the root cause before proceeding.
|
||||
|
||||
# ============================================
|
||||
# INFO - Receipt Type Distribution Anomaly
|
||||
# ============================================
|
||||
- alert: ReceiptDistributionAnomaly
|
||||
expr: |
|
||||
(rate(cloudflare_receipts_by_type_total{type="anomaly"}[1h])
|
||||
/ rate(cloudflare_receipts_by_type_total[1h])) > 0.5
|
||||
for: 1h
|
||||
labels:
|
||||
severity: info
|
||||
component: proofchain
|
||||
annotations:
|
||||
summary: "High proportion of anomaly receipts"
|
||||
description: |
|
||||
More than 50% of recent receipts are anomaly type.
|
||||
This may indicate systemic issues being logged.
|
||||
Review recent anomaly receipts for patterns.
|
||||
210
observatory/prometheus/alerts/tunnel-alerts.yml
Normal file
210
observatory/prometheus/alerts/tunnel-alerts.yml
Normal file
@@ -0,0 +1,210 @@
|
||||
# Tunnel Alert Rules for Cloudflare Mesh Observatory
|
||||
# Phase 5B - Alerts & Escalation
|
||||
|
||||
groups:
|
||||
- name: tunnel_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# ============================================
|
||||
# CRITICAL - Tunnel Down
|
||||
# ============================================
|
||||
- alert: TunnelDown
|
||||
expr: cloudflare_tunnel_status == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: tunnel
|
||||
playbook: tunnel-rotation
|
||||
annotations:
|
||||
summary: "Cloudflare Tunnel {{ $labels.tunnel_name }} is DOWN"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} (ID: {{ $labels.tunnel_id }}) has been
|
||||
unreachable for more than 2 minutes. Services behind this tunnel are
|
||||
likely unreachable.
|
||||
impact: "Services behind tunnel are unreachable from the internet"
|
||||
runbook_url: "https://wiki.internal/playbooks/tunnel-rotation"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - All Tunnels Down
|
||||
# ============================================
|
||||
- alert: AllTunnelsDown
|
||||
expr: count(cloudflare_tunnel_status == 1) == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: tunnel
|
||||
playbook: tunnel-rotation
|
||||
annotations:
|
||||
summary: "ALL Cloudflare Tunnels are DOWN"
|
||||
description: |
|
||||
No healthy tunnels detected. Complete loss of tunnel connectivity.
|
||||
This is a P0 incident requiring immediate attention.
|
||||
impact: "Complete loss of external connectivity via tunnels"
|
||||
runbook_url: "https://wiki.internal/playbooks/tunnel-rotation"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Tunnel Degraded
|
||||
# ============================================
|
||||
- alert: TunnelDegraded
|
||||
expr: cloudflare_tunnel_connections < 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "Tunnel {{ $labels.tunnel_name }} has reduced connections"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} has fewer than 2 active connections.
|
||||
This may indicate network issues or cloudflared problems.
|
||||
runbook_url: "https://wiki.internal/playbooks/tunnel-rotation"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Tunnel Rotation Due
|
||||
# ============================================
|
||||
- alert: TunnelRotationDue
|
||||
expr: (time() - cloudflare_tunnel_created_timestamp) > (86400 * 30)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
playbook: tunnel-rotation
|
||||
annotations:
|
||||
summary: "Tunnel {{ $labels.tunnel_name }} rotation is due"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} was created more than 30 days ago.
|
||||
Per security policy, tunnels should be rotated monthly.
|
||||
Age: {{ $value | humanizeDuration }}
|
||||
runbook_url: "https://wiki.internal/playbooks/tunnel-rotation"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Tunnel Rotation Overdue
|
||||
# ============================================
|
||||
- alert: TunnelRotationOverdue
|
||||
expr: (time() - cloudflare_tunnel_created_timestamp) > (86400 * 45)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
component: tunnel
|
||||
playbook: tunnel-rotation
|
||||
annotations:
|
||||
summary: "Tunnel {{ $labels.tunnel_name }} rotation is OVERDUE"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} is more than 45 days old.
|
||||
This exceeds the maximum rotation interval and represents a
|
||||
security policy violation.
|
||||
Age: {{ $value | humanizeDuration }}
|
||||
runbook_url: "https://wiki.internal/playbooks/tunnel-rotation"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Tunnel High Latency
|
||||
# ============================================
|
||||
- alert: TunnelHighLatency
|
||||
expr: cloudflare_tunnel_latency_ms > 500
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "High latency on tunnel {{ $labels.tunnel_name }}"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} is experiencing latency above 500ms.
|
||||
Current latency: {{ $value }}ms
|
||||
This may impact user experience.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Tunnel Very High Latency
|
||||
# ============================================
|
||||
- alert: TunnelVeryHighLatency
|
||||
expr: cloudflare_tunnel_latency_ms > 2000
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "Critical latency on tunnel {{ $labels.tunnel_name }}"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} latency exceeds 2000ms.
|
||||
Current latency: {{ $value }}ms
|
||||
Services may be timing out.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Tunnel Error Rate High
|
||||
# ============================================
|
||||
- alert: TunnelHighErrorRate
|
||||
expr: |
|
||||
rate(cloudflare_tunnel_errors_total[5m])
|
||||
/ rate(cloudflare_tunnel_requests_total[5m]) > 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "High error rate on tunnel {{ $labels.tunnel_name }}"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} error rate exceeds 5%.
|
||||
Current error rate: {{ $value | humanizePercentage }}
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Tunnel Error Rate Critical
|
||||
# ============================================
|
||||
- alert: TunnelCriticalErrorRate
|
||||
expr: |
|
||||
rate(cloudflare_tunnel_errors_total[5m])
|
||||
/ rate(cloudflare_tunnel_requests_total[5m]) > 0.20
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "Critical error rate on tunnel {{ $labels.tunnel_name }}"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} error rate exceeds 20%.
|
||||
Current error rate: {{ $value | humanizePercentage }}
|
||||
This indicates severe connectivity issues.
|
||||
|
||||
# ============================================
|
||||
# INFO - Tunnel Configuration Changed
|
||||
# ============================================
|
||||
- alert: TunnelConfigChanged
|
||||
expr: changes(cloudflare_tunnel_config_hash[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "Tunnel {{ $labels.tunnel_name }} configuration changed"
|
||||
description: |
|
||||
The configuration for tunnel {{ $labels.tunnel_name }} has changed
|
||||
in the last hour. Verify this was an authorized change.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Cloudflared Version Outdated
|
||||
# ============================================
|
||||
- alert: CloudflaredOutdated
|
||||
expr: cloudflare_cloudflared_version_age_days > 90
|
||||
for: 24h
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "cloudflared version is outdated"
|
||||
description: |
|
||||
The cloudflared binary is more than 90 days old.
|
||||
Current version age: {{ $value }} days
|
||||
Consider upgrading to latest version for security patches.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Tunnel Connection Flapping
|
||||
# ============================================
|
||||
- alert: TunnelConnectionFlapping
|
||||
expr: changes(cloudflare_tunnel_status[10m]) > 3
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
component: tunnel
|
||||
annotations:
|
||||
summary: "Tunnel {{ $labels.tunnel_name }} is flapping"
|
||||
description: |
|
||||
Tunnel {{ $labels.tunnel_name }} has changed state {{ $value }} times
|
||||
in the last 10 minutes. This indicates instability.
|
||||
Check network connectivity and cloudflared logs.
|
||||
266
observatory/prometheus/alerts/waf-alerts.yml
Normal file
266
observatory/prometheus/alerts/waf-alerts.yml
Normal file
@@ -0,0 +1,266 @@
|
||||
# WAF Alert Rules for Cloudflare Mesh Observatory
|
||||
# Phase 5B - Alerts & Escalation
|
||||
|
||||
groups:
|
||||
- name: waf_alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# ============================================
|
||||
# CRITICAL - Massive Attack Detected
|
||||
# ============================================
|
||||
- alert: WAFMassiveAttack
|
||||
expr: |
|
||||
rate(cloudflare_waf_blocked_requests_total[5m]) > 1000
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: waf
|
||||
playbook: waf-incident
|
||||
annotations:
|
||||
summary: "Massive attack detected - {{ $value | humanize }} blocks/sec"
|
||||
description: |
|
||||
WAF is blocking more than 1000 requests per second.
|
||||
This indicates a significant attack in progress.
|
||||
|
||||
Consider enabling Under Attack Mode if not already active.
|
||||
impact: "Potential service degradation under attack load"
|
||||
current_mitigation: "WAF blocking enabled"
|
||||
runbook_url: "https://wiki.internal/playbooks/waf-incident"
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - WAF Rule Bypass Detected
|
||||
# ============================================
|
||||
- alert: WAFRuleBypass
|
||||
expr: cloudflare_waf_bypass_detected == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: waf
|
||||
playbook: waf-incident
|
||||
security_incident: "true"
|
||||
annotations:
|
||||
summary: "WAF rule bypass detected for rule {{ $labels.rule_id }}"
|
||||
description: |
|
||||
Malicious traffic matching known attack patterns has bypassed
|
||||
WAF rule {{ $labels.rule_id }}.
|
||||
|
||||
Attack type: {{ $labels.attack_type }}
|
||||
Bypassed requests: {{ $labels.bypass_count }}
|
||||
|
||||
Review and tighten rule immediately.
|
||||
runbook_url: "https://wiki.internal/playbooks/waf-incident"
|
||||
|
||||
# ============================================
|
||||
# WARNING - Attack Spike
|
||||
# ============================================
|
||||
- alert: WAFAttackSpike
|
||||
expr: |
|
||||
rate(cloudflare_waf_blocked_requests_total[5m])
|
||||
> 5 * avg_over_time(rate(cloudflare_waf_blocked_requests_total[5m])[24h:5m])
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "WAF block rate 5x above normal"
|
||||
description: |
|
||||
WAF is blocking significantly more requests than the 24-hour average.
|
||||
Current rate: {{ $value | humanize }}/s
|
||||
|
||||
This may indicate an attack or new attack pattern.
|
||||
|
||||
# ============================================
|
||||
# WARNING - SQL Injection Attempts
|
||||
# ============================================
|
||||
- alert: WAFSQLiAttack
|
||||
expr: rate(cloudflare_waf_sqli_blocks_total[5m]) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
attack_type: sqli
|
||||
annotations:
|
||||
summary: "SQL injection attack detected"
|
||||
description: |
|
||||
WAF is blocking SQL injection attempts at {{ $value | humanize }}/s.
|
||||
Source IPs may need to be blocked at firewall level.
|
||||
|
||||
# ============================================
|
||||
# WARNING - XSS Attempts
|
||||
# ============================================
|
||||
- alert: WAFXSSAttack
|
||||
expr: rate(cloudflare_waf_xss_blocks_total[5m]) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
attack_type: xss
|
||||
annotations:
|
||||
summary: "XSS attack detected"
|
||||
description: |
|
||||
WAF is blocking cross-site scripting attempts at {{ $value | humanize }}/s.
|
||||
Review application input validation.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Bot Attack
|
||||
# ============================================
|
||||
- alert: WAFBotAttack
|
||||
expr: rate(cloudflare_waf_bot_blocks_total[5m]) > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
attack_type: bot
|
||||
annotations:
|
||||
summary: "High bot traffic detected"
|
||||
description: |
|
||||
WAF is blocking bot traffic at {{ $value | humanize }}/s.
|
||||
Consider enabling Bot Fight Mode or stricter challenges.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - Rate Limit Exhaustion
|
||||
# ============================================
|
||||
- alert: WAFRateLimitExhausted
|
||||
expr: cloudflare_waf_rate_limit_triggered == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "Rate limit triggered for {{ $labels.rule_name }}"
|
||||
description: |
|
||||
Rate limiting rule {{ $labels.rule_name }} has been triggered.
|
||||
Source: {{ $labels.source_ip }}
|
||||
Requests blocked: {{ $labels.blocked_count }}
|
||||
|
||||
Legitimate users may be affected.
|
||||
|
||||
# ============================================
|
||||
# WARNING - WAF Rule Disabled
|
||||
# ============================================
|
||||
- alert: WAFRuleDisabled
|
||||
expr: cloudflare_waf_rule_enabled == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "WAF rule {{ $labels.rule_id }} is disabled"
|
||||
description: |
|
||||
WAF rule {{ $labels.rule_id }} ({{ $labels.rule_name }}) is currently disabled.
|
||||
Verify this is intentional and not a misconfiguration.
|
||||
|
||||
# ============================================
|
||||
# WARNING - WAF Mode Changed
|
||||
# ============================================
|
||||
- alert: WAFModeChanged
|
||||
expr: changes(cloudflare_waf_mode[1h]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "WAF mode changed for zone {{ $labels.zone }}"
|
||||
description: |
|
||||
WAF operation mode has changed in the last hour.
|
||||
New mode: {{ $labels.mode }}
|
||||
Verify this was an authorized change.
|
||||
|
||||
# ============================================
|
||||
# INFO - Under Attack Mode Active
|
||||
# ============================================
|
||||
- alert: UnderAttackModeActive
|
||||
expr: cloudflare_zone_under_attack == 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "Under Attack Mode is ACTIVE for {{ $labels.zone }}"
|
||||
description: |
|
||||
Under Attack Mode is currently enabled for zone {{ $labels.zone }}.
|
||||
This adds a JavaScript challenge to all visitors.
|
||||
Remember to disable when attack subsides.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Under Attack Mode Extended
|
||||
# ============================================
|
||||
- alert: UnderAttackModeExtended
|
||||
expr: cloudflare_zone_under_attack == 1
|
||||
for: 2h
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "Under Attack Mode active for 2+ hours"
|
||||
description: |
|
||||
Under Attack Mode has been active for {{ $labels.zone }} for more
|
||||
than 2 hours. Verify it's still needed as it impacts user experience.
|
||||
|
||||
# ============================================
|
||||
# CRITICAL - WAF Completely Disabled
|
||||
# ============================================
|
||||
- alert: WAFDisabled
|
||||
expr: cloudflare_waf_enabled == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "WAF is DISABLED for zone {{ $labels.zone }}"
|
||||
description: |
|
||||
The Web Application Firewall is completely disabled for {{ $labels.zone }}.
|
||||
This leaves the zone unprotected against application-layer attacks.
|
||||
|
||||
Enable immediately unless there's a documented exception.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Low WAF Efficacy
|
||||
# ============================================
|
||||
- alert: WAFLowEfficacy
|
||||
expr: |
|
||||
cloudflare_waf_blocked_requests_total
|
||||
/ cloudflare_waf_analyzed_requests_total < 0.001
|
||||
for: 1h
|
||||
labels:
|
||||
severity: info
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "Low WAF block rate for {{ $labels.zone }}"
|
||||
description: |
|
||||
WAF is blocking very few requests (< 0.1%).
|
||||
This might indicate rules are too permissive or
|
||||
the zone is not receiving attack traffic.
|
||||
|
||||
# ============================================
|
||||
# WARNING - Firewall Rule Missing
|
||||
# ============================================
|
||||
- alert: FirewallRuleMissing
|
||||
expr: cloudflare_firewall_critical_rule_exists == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "Critical firewall rule missing: {{ $labels.rule_name }}"
|
||||
description: |
|
||||
Expected firewall rule {{ $labels.rule_name }} is not configured.
|
||||
This rule is marked as critical in the WAF baseline.
|
||||
|
||||
# ============================================
|
||||
# WARNING - High False Positive Rate
|
||||
# ============================================
|
||||
- alert: WAFHighFalsePositives
|
||||
expr: |
|
||||
rate(cloudflare_waf_false_positives_total[1h])
|
||||
/ rate(cloudflare_waf_blocked_requests_total[1h]) > 0.1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
component: waf
|
||||
annotations:
|
||||
summary: "High WAF false positive rate"
|
||||
description: |
|
||||
WAF false positive rate exceeds 10%.
|
||||
Current rate: {{ $value | humanizePercentage }}
|
||||
Review and tune rules to reduce legitimate traffic blocking.
|
||||
Reference in New Issue
Block a user