1pub const FORBIDDEN_DEFAULT_METRIC_LABELS: &[&str] = &[
14 "lease_id",
15 "trace_id",
16 "span_id",
17 "tenant_id",
18 "tenant_name",
19 "pod_uid",
20 "pod_name",
21 "service_account",
22 "logical_resource_name",
23 "operation_hash",
24 "node_id",
25];
26
27pub const STABLE_METRIC_FAMILIES: &[&str] = &[
30 "grafos_leases_active",
31 "grafos_leases_total",
32 "grafos_leases_expired_total",
33 "grafos_leases_fenced_total",
34 "grafos_leases_revoked_total",
35 "grafos_ops_total",
36 "grafos_ops_errors_total",
37 "grafos_bytes_read_total",
38 "grafos_bytes_written_total",
39 "grafos_op_latency_us",
40 "grafos_bind_latency_us",
41 "grafos_renew_latency_us",
42 "grafos_revoke_latency_us",
43 "grafos_teardown_latency_us",
44 "grafos_auth_failures_total",
45 "grafos_replay_rejections_total",
46 "grafos_token_validations_total",
47 "grafos_token_failures_total",
48 "grafos_stale_access_rejections_total",
49 "grafos_tasklet_submits_total",
50 "grafos_tasklet_completions_total",
51 "grafos_tasklet_failures_total",
52 "grafos_tasklet_submit_latency_us",
53 "grafos_tasklet_exec_latency_us",
54 "grafos_tasklet_duration_us",
55];
56
57pub const PHASE_219_METRIC_FAMILIES: &[&str] = &[
59 "grafos_lease_operations_total",
60 "grafos_lease_operation_latency_us",
61 "grafos_scheduler_admissions_total",
62 "grafos_scheduler_admission_latency_us",
63 "grafos_scheduler_pending_admissions",
64 "grafos_scheduler_preemptions_total",
65 "grafos_scheduler_preempted_capacity_bytes_total",
66 "grafos_scheduler_placement_candidates_total",
67 "grafos_scheduler_placement_latency_us",
68 "grafos_resource_capacity_bytes",
69 "grafos_resource_used_bytes",
70 "grafos_resource_pressure_ratio",
71 "grafos_failure_domain_health",
72 "grafos_replica_lag_records",
73 "grafos_economics_generation_total",
74 "grafos_economics_cap_rejections_total",
75];
76
77pub const PHASE_219_SPAN_NAMES: &[&str] = &[
79 "grafos.api.request",
80 "grafos.scheduler.admission",
81 "grafos.scheduler.placement",
82 "grafos.scheduler.preemption",
83 "grafos.scheduler.lease_request",
84 "fabricbios.control.lease_alloc",
85 "fabricbios.control.lease_renew",
86 "fabricbios.control.lease_revoke",
87 "fabricbios.control.capability_issue",
88 "fabricbios.data.fbmu",
89 "fabricbios.data.fbbu",
90 "fabricbios.data.gpu_session",
91 "grafos.runtime.capability_cache",
92 "grafos.audit.emit",
93 "grafos.economics.publish_generation",
94];
95
96pub const PHASE_219_EVENT_NAMES: &[&str] = &[
98 "lease_acquired",
99 "lease_dropped",
100 "lease_expired",
101 "lease_revoked",
102 "lease_fenced",
103 "teardown_failed",
104 "op_completed",
105 "op_failed",
106 "admission_approved",
107 "admission_denied",
108 "placement_decision",
109 "preemption_triggered",
110 "cross_state_disagreement_resolved",
111 "auth_failed",
112 "replay_rejected",
113 "token_validation_failed",
114 "token_minted",
115 "scheduler_election_lost",
116 "scheduler_promotion_failed",
117 "scheduler_stale_leader_detected",
118 "scheduler_promoted",
119];
120
121pub fn is_forbidden_default_metric_label(label: &str) -> bool {
124 FORBIDDEN_DEFAULT_METRIC_LABELS.contains(&label)
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130
131 fn assert_contains_all(haystack: &[&str], needles: &[&str]) {
132 for needle in needles {
133 assert!(
134 haystack.contains(needle),
135 "observability contract is missing {needle}"
136 );
137 }
138 }
139
140 #[test]
141 fn phase_219_metric_contract_contains_required_families() {
142 assert_contains_all(
143 PHASE_219_METRIC_FAMILIES,
144 &[
145 "grafos_lease_operations_total",
146 "grafos_scheduler_admissions_total",
147 "grafos_scheduler_preemptions_total",
148 "grafos_scheduler_placement_candidates_total",
149 "grafos_resource_pressure_ratio",
150 "grafos_failure_domain_health",
151 "grafos_economics_generation_total",
152 ],
153 );
154 }
155
156 #[test]
157 fn stable_metric_contract_contains_existing_exported_families() {
158 assert_contains_all(
159 STABLE_METRIC_FAMILIES,
160 &[
161 "grafos_leases_active",
162 "grafos_leases_total",
163 "grafos_leases_revoked_total",
164 "grafos_revoke_latency_us",
165 "grafos_tasklet_duration_us",
166 ],
167 );
168 }
169
170 #[test]
171 fn forbidden_metric_labels_cover_high_cardinality_identifiers() {
172 for label in [
173 "lease_id",
174 "trace_id",
175 "tenant_id",
176 "tenant_name",
177 "pod_uid",
178 "logical_resource_name",
179 "operation_hash",
180 "node_id",
181 ] {
182 assert!(
183 is_forbidden_default_metric_label(label),
184 "{label} must stay out of default production metric labels"
185 );
186 }
187 }
188
189 #[test]
190 fn phase_219_span_contract_contains_admission_to_lease_path() {
191 assert_contains_all(
192 PHASE_219_SPAN_NAMES,
193 &[
194 "grafos.api.request",
195 "grafos.scheduler.admission",
196 "grafos.scheduler.placement",
197 "grafos.scheduler.lease_request",
198 "fabricbios.control.lease_alloc",
199 "fabricbios.control.capability_issue",
200 "grafos.audit.emit",
201 ],
202 );
203 }
204
205 #[test]
206 fn phase_219_event_contract_contains_lifecycle_and_decision_events() {
207 assert_contains_all(
208 PHASE_219_EVENT_NAMES,
209 &[
210 "lease_acquired",
211 "lease_revoked",
212 "lease_fenced",
213 "admission_denied",
214 "placement_decision",
215 "preemption_triggered",
216 "cross_state_disagreement_resolved",
217 "scheduler_promoted",
218 ],
219 );
220 }
221}