Initialize repository snapshot
This commit is contained in:
20
vaultmesh-observability/Cargo.toml
Normal file
20
vaultmesh-observability/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "vaultmesh-observability"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "vaultmesh-observability"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
vaultmesh-core = { path = "../vaultmesh-core" }
|
||||
prometheus = "0.13"
|
||||
tokio = { version = "1.28", features = ["rt-multi-thread", "macros", "time"] }
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
reqwest = { version = "0.11", features = ["json", "rustls-tls"] }
|
||||
tokio = { version = "1.28", features = ["rt-multi-thread", "macros", "time"] }
|
||||
23
vaultmesh-observability/Dockerfile
Normal file
23
vaultmesh-observability/Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
FROM rust:1.75 as builder
|
||||
|
||||
WORKDIR /usr/src/vaultmesh
|
||||
|
||||
# Copy workspace Cargo files
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY vaultmesh-core ./vaultmesh-core
|
||||
COPY vaultmesh-observability ./vaultmesh-observability
|
||||
|
||||
# Build release binary
|
||||
RUN cargo build --release --package vaultmesh-observability
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /usr/src/vaultmesh/target/release/vaultmesh-observability /usr/local/bin/vaultmesh-observability
|
||||
|
||||
EXPOSE 9108
|
||||
|
||||
USER 1000
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/vaultmesh-observability"]
|
||||
208
vaultmesh-observability/src/lib.rs
Normal file
208
vaultmesh-observability/src/lib.rs
Normal file
@@ -0,0 +1,208 @@
|
||||
//! vaultmesh-observability
|
||||
//! ObservabilityEngine: Prometheus exporter that exposes VaultMesh receipts metrics.
|
||||
|
||||
use prometheus::{
|
||||
Encoder, Gauge, HistogramOpts, HistogramVec, IntCounterVec, Opts, Registry, TextEncoder,
|
||||
};
|
||||
use std::convert::Infallible;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Request, Response, Server};
|
||||
|
||||
/// Schema version for observability receipts
|
||||
pub const SCHEMA_VERSION: &str = "2.0.0";
|
||||
|
||||
/// ObservabilityEngine - Prometheus metrics exporter for VaultMesh
|
||||
#[derive(Clone)]
|
||||
pub struct ObservabilityEngine {
|
||||
registry: Registry,
|
||||
receipts_counter: IntCounterVec,
|
||||
receipts_failed: IntCounterVec,
|
||||
anchor_age: Gauge,
|
||||
emit_latency: HistogramVec,
|
||||
}
|
||||
|
||||
impl ObservabilityEngine {
|
||||
/// Create a new ObservabilityEngine with all metrics registered
|
||||
pub fn new() -> Self {
|
||||
let registry = Registry::new();
|
||||
|
||||
let receipts_opts =
|
||||
Opts::new("vaultmesh_receipts_total", "Number of receipts emitted by module");
|
||||
let receipts_counter =
|
||||
IntCounterVec::new(receipts_opts, &["module"]).expect("receipts counter");
|
||||
registry
|
||||
.register(Box::new(receipts_counter.clone()))
|
||||
.unwrap();
|
||||
|
||||
let failed_opts = Opts::new(
|
||||
"vaultmesh_receipts_failed_total",
|
||||
"Number of failed receipt emissions",
|
||||
);
|
||||
let receipts_failed =
|
||||
IntCounterVec::new(failed_opts, &["module", "reason"]).expect("failed counter");
|
||||
registry
|
||||
.register(Box::new(receipts_failed.clone()))
|
||||
.unwrap();
|
||||
|
||||
let anchor_age = Gauge::with_opts(Opts::new(
|
||||
"vaultmesh_anchor_age_seconds",
|
||||
"Seconds since last guardian anchor",
|
||||
))
|
||||
.expect("anchor age gauge");
|
||||
registry.register(Box::new(anchor_age.clone())).unwrap();
|
||||
|
||||
let hist_opts = HistogramOpts::new(
|
||||
"vaultmesh_emit_seconds",
|
||||
"Histogram for receipt emit latency in seconds",
|
||||
);
|
||||
let emit_latency = HistogramVec::new(hist_opts, &["module"]).expect("emit latency hist");
|
||||
registry.register(Box::new(emit_latency.clone())).unwrap();
|
||||
|
||||
Self {
|
||||
registry,
|
||||
receipts_counter,
|
||||
receipts_failed,
|
||||
anchor_age,
|
||||
emit_latency,
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a successful receipt emission
|
||||
pub fn observe_emitted(&self, module: &str, latency_secs: f64) {
|
||||
self.receipts_counter.with_label_values(&[module]).inc();
|
||||
self.emit_latency
|
||||
.with_label_values(&[module])
|
||||
.observe(latency_secs);
|
||||
}
|
||||
|
||||
/// Record a failed receipt emission
|
||||
pub fn observe_failed(&self, module: &str, reason: &str) {
|
||||
self.receipts_failed
|
||||
.with_label_values(&[module, reason])
|
||||
.inc();
|
||||
}
|
||||
|
||||
/// Update the anchor age gauge (seconds since epoch of last anchor)
|
||||
pub fn set_anchor_age(&self, secs: f64) {
|
||||
self.anchor_age.set(secs);
|
||||
}
|
||||
|
||||
/// Gather all metrics and return as Prometheus text format
|
||||
pub fn gather_metrics(&self) -> String {
|
||||
let mut buffer = vec![];
|
||||
let encoder = TextEncoder::new();
|
||||
let metric_families = self.registry.gather();
|
||||
encoder.encode(&metric_families, &mut buffer).unwrap();
|
||||
String::from_utf8(buffer).unwrap()
|
||||
}
|
||||
|
||||
/// Start an HTTP server serving /metrics and /health endpoints
|
||||
pub async fn serve(
|
||||
self: Arc<Self>,
|
||||
addr: &SocketAddr,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let registry_self = self.clone();
|
||||
let make_service = make_service_fn(move |_conn| {
|
||||
let inner = registry_self.clone();
|
||||
async move {
|
||||
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
|
||||
let inner_clone = inner.clone();
|
||||
async move {
|
||||
match (req.method().as_str(), req.uri().path()) {
|
||||
("GET", "/metrics") => {
|
||||
let body = inner_clone.gather_metrics();
|
||||
Ok::<_, Infallible>(Response::new(Body::from(body)))
|
||||
}
|
||||
("GET", "/health") => {
|
||||
Ok::<_, Infallible>(Response::new(Body::from("ok")))
|
||||
}
|
||||
_ => Ok::<_, Infallible>(
|
||||
Response::builder()
|
||||
.status(404)
|
||||
.body(Body::from("not found"))
|
||||
.unwrap(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
let server = Server::bind(addr).serve(make_service);
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = server.await {
|
||||
eprintln!("server error: {}", e);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ObservabilityEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_engine_creation() {
|
||||
let engine = ObservabilityEngine::new();
|
||||
// Prometheus only outputs metrics after they've been observed
|
||||
// So we need to observe something first
|
||||
engine.observe_emitted("test", 0.01);
|
||||
let metrics = engine.gather_metrics();
|
||||
assert!(metrics.contains("vaultmesh_receipts_total"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_observe_emitted() {
|
||||
let engine = ObservabilityEngine::new();
|
||||
engine.observe_emitted("guardian", 0.05);
|
||||
engine.observe_emitted("guardian", 0.03);
|
||||
engine.observe_emitted("treasury", 0.01);
|
||||
|
||||
let metrics = engine.gather_metrics();
|
||||
assert!(metrics.contains("vaultmesh_receipts_total"));
|
||||
assert!(metrics.contains("guardian"));
|
||||
assert!(metrics.contains("treasury"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_observe_failed() {
|
||||
let engine = ObservabilityEngine::new();
|
||||
engine.observe_failed("mesh", "io_error");
|
||||
|
||||
let metrics = engine.gather_metrics();
|
||||
assert!(metrics.contains("vaultmesh_receipts_failed_total"));
|
||||
assert!(metrics.contains("mesh"));
|
||||
assert!(metrics.contains("io_error"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anchor_age() {
|
||||
let engine = ObservabilityEngine::new();
|
||||
engine.set_anchor_age(1234.5);
|
||||
|
||||
let metrics = engine.gather_metrics();
|
||||
assert!(metrics.contains("vaultmesh_anchor_age_seconds"));
|
||||
assert!(metrics.contains("1234.5"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emit_latency_histogram() {
|
||||
let engine = ObservabilityEngine::new();
|
||||
engine.observe_emitted("compliance", 0.001);
|
||||
engine.observe_emitted("compliance", 0.002);
|
||||
|
||||
let metrics = engine.gather_metrics();
|
||||
assert!(metrics.contains("vaultmesh_emit_seconds"));
|
||||
assert!(metrics.contains("compliance"));
|
||||
}
|
||||
}
|
||||
55
vaultmesh-observability/src/main.rs
Normal file
55
vaultmesh-observability/src/main.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
//! VaultMesh Observability Exporter
|
||||
//!
|
||||
//! HTTP server exposing Prometheus metrics at :9108/metrics
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use vaultmesh_observability::ObservabilityEngine;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// Default listen address
|
||||
let addr: SocketAddr = std::env::var("VAULTMESH_METRICS_ADDR")
|
||||
.unwrap_or_else(|_| "0.0.0.0:9108".to_string())
|
||||
.parse()
|
||||
.expect("Invalid address");
|
||||
|
||||
let engine = Arc::new(ObservabilityEngine::new());
|
||||
|
||||
// Start HTTP server
|
||||
engine
|
||||
.clone()
|
||||
.serve(&addr)
|
||||
.await
|
||||
.expect("Failed to start server");
|
||||
|
||||
println!("vaultmesh-observability exporter listening on http://{}", addr);
|
||||
println!(" /metrics - Prometheus metrics");
|
||||
println!(" /health - Health check");
|
||||
|
||||
// Example: demo background emitter (in production, hooks call engine.observe_emitted)
|
||||
let demo = engine.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
// Simulate some metrics updates
|
||||
demo.observe_emitted("guardian", 0.02);
|
||||
demo.observe_emitted("treasury", 0.01);
|
||||
|
||||
// In real usage, anchor_age would be computed from last anchor timestamp
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as f64;
|
||||
demo.set_anchor_age(now);
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(5)).await;
|
||||
}
|
||||
});
|
||||
|
||||
// Keep main alive
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(3600)).await;
|
||||
}
|
||||
}
|
||||
85
vaultmesh-observability/tests/smoketest.rs
Normal file
85
vaultmesh-observability/tests/smoketest.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
//! Smoke test for the observability exporter HTTP server
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use vaultmesh_observability::ObservabilityEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metrics_endpoint_returns_200() {
|
||||
let engine = Arc::new(ObservabilityEngine::new());
|
||||
let addr: SocketAddr = "127.0.0.1:19108".parse().unwrap();
|
||||
|
||||
// Prometheus only outputs metrics after they've been observed
|
||||
engine.observe_emitted("test", 0.01);
|
||||
engine.set_anchor_age(1700000000.0);
|
||||
|
||||
engine.clone().serve(&addr).await.expect("serve failed");
|
||||
|
||||
// Give the server time to start
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Request /metrics
|
||||
let resp = reqwest::get("http://127.0.0.1:19108/metrics")
|
||||
.await
|
||||
.expect("request failed");
|
||||
|
||||
assert!(resp.status().is_success(), "Expected 200 OK");
|
||||
|
||||
let body = resp.text().await.expect("body read failed");
|
||||
assert!(
|
||||
body.contains("vaultmesh_receipts_total"),
|
||||
"Expected vaultmesh_receipts_total metric"
|
||||
);
|
||||
assert!(
|
||||
body.contains("vaultmesh_anchor_age_seconds"),
|
||||
"Expected vaultmesh_anchor_age_seconds metric"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_health_endpoint() {
|
||||
let engine = Arc::new(ObservabilityEngine::new());
|
||||
let addr: SocketAddr = "127.0.0.1:19109".parse().unwrap();
|
||||
|
||||
engine.clone().serve(&addr).await.expect("serve failed");
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
|
||||
let resp = reqwest::get("http://127.0.0.1:19109/health")
|
||||
.await
|
||||
.expect("request failed");
|
||||
|
||||
assert!(resp.status().is_success());
|
||||
let body = resp.text().await.unwrap();
|
||||
assert_eq!(body, "ok");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metrics_after_observations() {
|
||||
let engine = Arc::new(ObservabilityEngine::new());
|
||||
let addr: SocketAddr = "127.0.0.1:19110".parse().unwrap();
|
||||
|
||||
// Record some metrics before starting server
|
||||
engine.observe_emitted("guardian", 0.05);
|
||||
engine.observe_emitted("treasury", 0.02);
|
||||
engine.observe_failed("mesh", "timeout");
|
||||
engine.set_anchor_age(1700000000.0);
|
||||
|
||||
engine.clone().serve(&addr).await.expect("serve failed");
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
|
||||
let resp = reqwest::get("http://127.0.0.1:19110/metrics")
|
||||
.await
|
||||
.expect("request failed");
|
||||
let body = resp.text().await.unwrap();
|
||||
|
||||
// Check that our recorded metrics appear
|
||||
assert!(body.contains("guardian"), "Expected guardian label");
|
||||
assert!(body.contains("treasury"), "Expected treasury label");
|
||||
assert!(body.contains("mesh"), "Expected mesh label");
|
||||
assert!(body.contains("timeout"), "Expected timeout reason");
|
||||
assert!(
|
||||
body.contains("1.7e+09") || body.contains("1700000000"),
|
||||
"Expected anchor age value"
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user