# Tunnel Alert Rules for Cloudflare Mesh Observatory # Phase 5B - Alerts & Escalation groups: - name: tunnel_alerts interval: 30s rules: # ============================================ # CRITICAL - Tunnel Down # ============================================ - alert: TunnelDown expr: cloudflare_tunnel_status == 0 for: 2m labels: severity: critical component: tunnel playbook: tunnel-rotation annotations: summary: "Cloudflare Tunnel {{ $labels.tunnel_name }} is DOWN" description: | Tunnel {{ $labels.tunnel_name }} (ID: {{ $labels.tunnel_id }}) has been unreachable for more than 2 minutes. Services behind this tunnel are likely unreachable. impact: "Services behind tunnel are unreachable from the internet" runbook_url: "https://wiki.internal/playbooks/tunnel-rotation" # ============================================ # CRITICAL - All Tunnels Down # ============================================ - alert: AllTunnelsDown expr: count(cloudflare_tunnel_status == 1) == 0 for: 1m labels: severity: critical component: tunnel playbook: tunnel-rotation annotations: summary: "ALL Cloudflare Tunnels are DOWN" description: | No healthy tunnels detected. Complete loss of tunnel connectivity. This is a P0 incident requiring immediate attention. impact: "Complete loss of external connectivity via tunnels" runbook_url: "https://wiki.internal/playbooks/tunnel-rotation" # ============================================ # WARNING - Tunnel Degraded # ============================================ - alert: TunnelDegraded expr: cloudflare_tunnel_connections < 2 for: 5m labels: severity: warning component: tunnel annotations: summary: "Tunnel {{ $labels.tunnel_name }} has reduced connections" description: | Tunnel {{ $labels.tunnel_name }} has fewer than 2 active connections. This may indicate network issues or cloudflared problems. runbook_url: "https://wiki.internal/playbooks/tunnel-rotation" # ============================================ # WARNING - Tunnel Rotation Due # ============================================ - alert: TunnelRotationDue expr: (time() - cloudflare_tunnel_created_timestamp) > (86400 * 30) for: 1h labels: severity: warning component: tunnel playbook: tunnel-rotation annotations: summary: "Tunnel {{ $labels.tunnel_name }} rotation is due" description: | Tunnel {{ $labels.tunnel_name }} was created more than 30 days ago. Per security policy, tunnels should be rotated monthly. Age: {{ $value | humanizeDuration }} runbook_url: "https://wiki.internal/playbooks/tunnel-rotation" # ============================================ # CRITICAL - Tunnel Rotation Overdue # ============================================ - alert: TunnelRotationOverdue expr: (time() - cloudflare_tunnel_created_timestamp) > (86400 * 45) for: 1h labels: severity: critical component: tunnel playbook: tunnel-rotation annotations: summary: "Tunnel {{ $labels.tunnel_name }} rotation is OVERDUE" description: | Tunnel {{ $labels.tunnel_name }} is more than 45 days old. This exceeds the maximum rotation interval and represents a security policy violation. Age: {{ $value | humanizeDuration }} runbook_url: "https://wiki.internal/playbooks/tunnel-rotation" # ============================================ # WARNING - Tunnel High Latency # ============================================ - alert: TunnelHighLatency expr: cloudflare_tunnel_latency_ms > 500 for: 5m labels: severity: warning component: tunnel annotations: summary: "High latency on tunnel {{ $labels.tunnel_name }}" description: | Tunnel {{ $labels.tunnel_name }} is experiencing latency above 500ms. Current latency: {{ $value }}ms This may impact user experience. # ============================================ # CRITICAL - Tunnel Very High Latency # ============================================ - alert: TunnelVeryHighLatency expr: cloudflare_tunnel_latency_ms > 2000 for: 2m labels: severity: critical component: tunnel annotations: summary: "Critical latency on tunnel {{ $labels.tunnel_name }}" description: | Tunnel {{ $labels.tunnel_name }} latency exceeds 2000ms. Current latency: {{ $value }}ms Services may be timing out. # ============================================ # WARNING - Tunnel Error Rate High # ============================================ - alert: TunnelHighErrorRate expr: | rate(cloudflare_tunnel_errors_total[5m]) / rate(cloudflare_tunnel_requests_total[5m]) > 0.05 for: 5m labels: severity: warning component: tunnel annotations: summary: "High error rate on tunnel {{ $labels.tunnel_name }}" description: | Tunnel {{ $labels.tunnel_name }} error rate exceeds 5%. Current error rate: {{ $value | humanizePercentage }} # ============================================ # CRITICAL - Tunnel Error Rate Critical # ============================================ - alert: TunnelCriticalErrorRate expr: | rate(cloudflare_tunnel_errors_total[5m]) / rate(cloudflare_tunnel_requests_total[5m]) > 0.20 for: 2m labels: severity: critical component: tunnel annotations: summary: "Critical error rate on tunnel {{ $labels.tunnel_name }}" description: | Tunnel {{ $labels.tunnel_name }} error rate exceeds 20%. Current error rate: {{ $value | humanizePercentage }} This indicates severe connectivity issues. # ============================================ # INFO - Tunnel Configuration Changed # ============================================ - alert: TunnelConfigChanged expr: changes(cloudflare_tunnel_config_hash[1h]) > 0 for: 0m labels: severity: info component: tunnel annotations: summary: "Tunnel {{ $labels.tunnel_name }} configuration changed" description: | The configuration for tunnel {{ $labels.tunnel_name }} has changed in the last hour. Verify this was an authorized change. # ============================================ # WARNING - Cloudflared Version Outdated # ============================================ - alert: CloudflaredOutdated expr: cloudflare_cloudflared_version_age_days > 90 for: 24h labels: severity: warning component: tunnel annotations: summary: "cloudflared version is outdated" description: | The cloudflared binary is more than 90 days old. Current version age: {{ $value }} days Consider upgrading to latest version for security patches. # ============================================ # WARNING - Tunnel Connection Flapping # ============================================ - alert: TunnelConnectionFlapping expr: changes(cloudflare_tunnel_status[10m]) > 3 for: 10m labels: severity: warning component: tunnel annotations: summary: "Tunnel {{ $labels.tunnel_name }} is flapping" description: | Tunnel {{ $labels.tunnel_name }} has changed state {{ $value }} times in the last 10 minutes. This indicates instability. Check network connectivity and cloudflared logs.