grafos_observe/
cache_metrics.rs1use crate::metrics::{MetricCounter, MetricGauge, MetricHistogram};
9
10pub const CACHE_PREFILL_LATENCY_US: &str = "cache/prefill_latency_us";
16
17pub const CACHE_FIRST_TOKEN_LATENCY_US: &str = "cache/first_token_latency_us";
19
20pub const CACHE_DECODE_LATENCY_US: &str = "cache/decode_latency_us";
22
23pub const CACHE_HIT_TOTAL: &str = "cache/hit_total";
25
26pub const CACHE_MISS_TOTAL: &str = "cache/miss_total";
28
29pub const CACHE_RESIDENT_BYTES: &str = "cache/resident_bytes";
31
32pub const CACHE_SPILL_BYTES_TOTAL: &str = "cache/spill_bytes_total";
34
35pub const CACHE_WARMUP_BYTES_TOTAL: &str = "cache/warmup_bytes_total";
37
38pub const CACHE_ATTACH_FAILURE_TOTAL: &str = "cache/attach_failure_total";
40
41pub const CACHE_FORK_TOTAL: &str = "cache/fork_total";
43
44pub const CACHE_RECLAIM_TOTAL: &str = "cache/reclaim_total";
46
47pub const CACHE_DECODE_FAR_FROM_CACHE: &str = "cache/decode_far_from_cache";
49
50pub const ALL_CACHE_METRIC_NAMES: [&str; 12] = [
52 CACHE_PREFILL_LATENCY_US,
53 CACHE_FIRST_TOKEN_LATENCY_US,
54 CACHE_DECODE_LATENCY_US,
55 CACHE_HIT_TOTAL,
56 CACHE_MISS_TOTAL,
57 CACHE_RESIDENT_BYTES,
58 CACHE_SPILL_BYTES_TOTAL,
59 CACHE_WARMUP_BYTES_TOTAL,
60 CACHE_ATTACH_FAILURE_TOTAL,
61 CACHE_FORK_TOTAL,
62 CACHE_RECLAIM_TOTAL,
63 CACHE_DECODE_FAR_FROM_CACHE,
64];
65
66pub struct CacheMetrics {
81 pub prefill_latency: MetricHistogram,
83 pub first_token_latency: MetricHistogram,
85 pub decode_latency: MetricHistogram,
87 pub hit_total: MetricCounter,
89 pub miss_total: MetricCounter,
91 pub resident_bytes: MetricGauge,
93 pub spill_bytes_total: MetricCounter,
95 pub warmup_bytes_total: MetricCounter,
97 pub attach_failure_total: MetricCounter,
99 pub fork_total: MetricCounter,
101 pub reclaim_total: MetricCounter,
103 pub decode_far_from_cache: MetricCounter,
105}
106
107impl Default for CacheMetrics {
108 fn default() -> Self {
109 Self::new()
110 }
111}
112
113impl CacheMetrics {
114 pub const fn new() -> Self {
116 Self {
117 prefill_latency: MetricHistogram::new(),
118 first_token_latency: MetricHistogram::new(),
119 decode_latency: MetricHistogram::new(),
120 hit_total: MetricCounter::new(),
121 miss_total: MetricCounter::new(),
122 resident_bytes: MetricGauge::new(),
123 spill_bytes_total: MetricCounter::new(),
124 warmup_bytes_total: MetricCounter::new(),
125 attach_failure_total: MetricCounter::new(),
126 fork_total: MetricCounter::new(),
127 reclaim_total: MetricCounter::new(),
128 decode_far_from_cache: MetricCounter::new(),
129 }
130 }
131
132 pub fn global() -> &'static CacheMetrics {
134 static INSTANCE: CacheMetrics = CacheMetrics::new();
135 &INSTANCE
136 }
137
138 pub fn record_cache_created(&self, logical_bytes: u64) {
144 self.resident_bytes
145 .set(self.resident_bytes.get() + logical_bytes as i64);
146 }
147
148 pub fn record_cache_hit(&self) {
150 self.hit_total.inc();
151 }
152
153 pub fn record_cache_miss(&self) {
155 self.miss_total.inc();
156 }
157
158 pub fn record_attach_failure(&self) {
160 self.attach_failure_total.inc();
161 }
162
163 pub fn record_cache_spill(&self, bytes_moved: u64) {
165 self.spill_bytes_total.add(bytes_moved);
166 }
167
168 pub fn record_cache_warmup(&self, bytes_moved: u64) {
170 self.warmup_bytes_total.add(bytes_moved);
171 }
172
173 pub fn record_cache_reclaimed(&self, logical_bytes: u64) {
176 self.reclaim_total.inc();
177 self.resident_bytes
178 .set(self.resident_bytes.get() - logical_bytes as i64);
179 }
180
181 pub fn record_cache_forked(&self, logical_bytes: u64) {
183 self.fork_total.inc();
184 self.resident_bytes
185 .set(self.resident_bytes.get() + logical_bytes as i64);
186 }
187
188 pub fn record_prefill_latency(&self, latency_us: u64) {
190 self.prefill_latency.observe(latency_us);
191 }
192
193 pub fn record_first_token_latency(&self, latency_us: u64) {
195 self.first_token_latency.observe(latency_us);
196 }
197
198 pub fn record_decode_latency(&self, latency_us: u64) {
200 self.decode_latency.observe(latency_us);
201 }
202
203 pub fn record_decode_far_from_cache(&self) {
205 self.decode_far_from_cache.inc();
206 }
207}
208
209#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn all_metric_names_are_distinct() {
219 let names = ALL_CACHE_METRIC_NAMES;
220 for i in 0..names.len() {
221 for j in (i + 1)..names.len() {
222 assert_ne!(
223 names[i], names[j],
224 "metric names at indices {} and {} collide: {}",
225 i, j, names[i]
226 );
227 }
228 }
229 }
230
231 #[test]
232 fn all_metric_names_have_cache_prefix() {
233 for name in &ALL_CACHE_METRIC_NAMES {
234 assert!(
235 name.starts_with("cache/"),
236 "metric name {} does not start with 'cache/'",
237 name
238 );
239 }
240 }
241
242 #[test]
243 fn exactly_twelve_metrics() {
244 assert_eq!(ALL_CACHE_METRIC_NAMES.len(), 12);
245 }
246
247 #[test]
248 fn cache_metrics_global_is_singleton() {
249 let m1 = CacheMetrics::global();
250 let m2 = CacheMetrics::global();
251 assert!(core::ptr::eq(m1, m2));
252 }
253
254 #[test]
255 fn record_cache_created_increments_resident_bytes() {
256 let m = CacheMetrics::new();
257 m.record_cache_created(4096);
258 assert_eq!(m.resident_bytes.get(), 4096);
259 m.record_cache_created(1024);
260 assert_eq!(m.resident_bytes.get(), 5120);
261 }
262
263 #[test]
264 fn record_cache_hit_and_miss() {
265 let m = CacheMetrics::new();
266 m.record_cache_hit();
267 m.record_cache_hit();
268 m.record_cache_miss();
269 assert_eq!(m.hit_total.get(), 2);
270 assert_eq!(m.miss_total.get(), 1);
271 }
272
273 #[test]
274 fn record_cache_spill_and_warmup() {
275 let m = CacheMetrics::new();
276 m.record_cache_spill(1024);
277 m.record_cache_spill(2048);
278 m.record_cache_warmup(512);
279 assert_eq!(m.spill_bytes_total.get(), 3072);
280 assert_eq!(m.warmup_bytes_total.get(), 512);
281 }
282
283 #[test]
284 fn record_cache_reclaimed_decrements_resident() {
285 let m = CacheMetrics::new();
286 m.record_cache_created(8192);
287 m.record_cache_reclaimed(4096);
288 assert_eq!(m.resident_bytes.get(), 4096);
289 assert_eq!(m.reclaim_total.get(), 1);
290 }
291
292 #[test]
293 fn record_cache_forked() {
294 let m = CacheMetrics::new();
295 m.record_cache_forked(2048);
296 assert_eq!(m.fork_total.get(), 1);
297 assert_eq!(m.resident_bytes.get(), 2048);
298 }
299
300 #[test]
301 fn record_attach_failure() {
302 let m = CacheMetrics::new();
303 m.record_attach_failure();
304 m.record_attach_failure();
305 assert_eq!(m.attach_failure_total.get(), 2);
306 }
307
308 #[test]
309 fn record_latencies() {
310 let m = CacheMetrics::new();
311 m.record_prefill_latency(1000);
312 m.record_first_token_latency(500);
313 m.record_decode_latency(50);
314 assert_eq!(m.prefill_latency.count(), 1);
315 assert_eq!(m.first_token_latency.count(), 1);
316 assert_eq!(m.decode_latency.count(), 1);
317 }
318
319 #[test]
320 fn record_decode_far_from_cache() {
321 let m = CacheMetrics::new();
322 m.record_decode_far_from_cache();
323 assert_eq!(m.decode_far_from_cache.get(), 1);
324 }
325}