diff --git a/changelog.d/dd_sketch_averages_from_histograms.fix.md b/changelog.d/dd_sketch_averages_from_histograms.fix.md new file mode 100644 index 0000000000000..e409858602600 --- /dev/null +++ b/changelog.d/dd_sketch_averages_from_histograms.fix.md @@ -0,0 +1,3 @@ +Improved accuracy of ddsketch averages by using histogram's sum / count. + +authors: tony-resendiz diff --git a/lib/vector-core/src/metrics/ddsketch.rs b/lib/vector-core/src/metrics/ddsketch.rs index b66f25ceb150b..5fb910b39f369 100644 --- a/lib/vector-core/src/metrics/ddsketch.rs +++ b/lib/vector-core/src/metrics/ddsketch.rs @@ -779,6 +779,8 @@ impl AgentDDSketch { /// ## Errors /// /// Returns an error if a bucket size is greater that `u32::MAX`. + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_precision_loss)] pub fn transform_to_sketch(mut metric: Metric) -> Result { let sketch = match metric.data_mut().value_mut() { MetricValue::Distribution { samples, .. } => { @@ -788,10 +790,24 @@ impl AgentDDSketch { } Some(sketch) } - MetricValue::AggregatedHistogram { buckets, .. } => { + MetricValue::AggregatedHistogram { + buckets, + sum, + count, + .. + } => { let delta_buckets = mem::take(buckets); let mut sketch = AgentDDSketch::with_agent_defaults(); sketch.insert_interpolate_buckets(delta_buckets)?; + + let orig_sum = *sum; + let orig_count = *count; + if orig_count > 0 { + sketch.sum = orig_sum; + sketch.count = orig_count as u32; + sketch.avg = orig_sum / orig_count as f64; + } + Some(sketch) } // We can't convert from any other metric value.