From cc5b23ff39be3a8053756d9dffd0058bcb5d47e7 Mon Sep 17 00:00:00 2001 From: Antrakos Date: Tue, 6 Jan 2026 19:43:14 +0100 Subject: [PATCH] Fix duplicate HELP/TYPE declarations in Prometheus exporter Fixes #4868 The Prometheus exporter was generating duplicate HELP and TYPE declarations for metrics with varying label sets, violating the Prometheus format specification and causing rejection by Prometheus Pushgateway with the error: "second HELP line for metric name." Changes: - Modified metric family ID to exclude label keys, using only metric name, description, and unit - Implemented two-pass processing: first pass collects all unique label keys across data points, second pass builds label values with empty strings for missing labels - Ensured single metric family per metric type with consolidated label keys - Updated test expectations to verify single HELP/TYPE declaration with proper empty string handling for missing labels This aligns with the Go implementation approach and ensures compatibility with Prometheus Pushgateway and other strict validators. --- CHANGELOG.md | 2 + .../exporter/prometheus/__init__.py | 192 +++++++++--------- .../tests/test_prometheus_exporter.py | 12 +- 3 files changed, 106 insertions(+), 100 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c37861aa3b5..09da4c6580d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- `opentelemetry-exporter-prometheus`: Fix duplicate HELP/TYPE declarations for metrics with different label sets + ([#4868](https://github.com/open-telemetry/opentelemetry-python/issues/4868)) - Allow loading all resource detectors by setting `OTEL_EXPERIMENTAL_RESOURCE_DETECTORS` to `*` ([#4819](https://github.com/open-telemetry/opentelemetry-python/pull/4819)) - `opentelemetry-sdk`: Fix the type hint of the `_metrics_data` property to allow `None` diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py index 475cfb1266e..fa89da4e71e 100644 --- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py @@ -225,36 +225,23 @@ def _translate_to_prometheus( for metric in metrics: label_values_data_points = [] - label_keys_data_points = [] values = [] - per_metric_family_ids = [] - metric_name = sanitize_full_name(metric.name) metric_description = metric.description or "" metric_unit = map_unit(metric.unit) + # First pass: collect all unique label keys across all data points + all_label_keys_set = set() + data_point_attributes = [] for number_data_point in metric.data.data_points: - label_keys = [] - label_values = [] - - for key, value in sorted(number_data_point.attributes.items()): - label_keys.append(sanitize_attribute(key)) - label_values.append(self._check_value(value)) - - per_metric_family_ids.append( - "|".join( - [ - metric_name, - metric_description, - "%".join(label_keys), - metric_unit, - ] - ) - ) + attrs = {} + for key, value in number_data_point.attributes.items(): + sanitized_key = sanitize_attribute(key) + all_label_keys_set.add(sanitized_key) + attrs[sanitized_key] = self._check_value(value) + data_point_attributes.append(attrs) - label_values_data_points.append(label_values) - label_keys_data_points.append(label_keys) if isinstance(number_data_point, HistogramDataPoint): values.append( { @@ -268,87 +255,106 @@ def _translate_to_prometheus( else: values.append(number_data_point.value) - for per_metric_family_id, label_keys, label_values, value in zip( - per_metric_family_ids, - label_keys_data_points, - label_values_data_points, - values, - ): - is_non_monotonic_sum = ( - isinstance(metric.data, Sum) - and metric.data.is_monotonic is False - ) - is_cumulative = ( - isinstance(metric.data, Sum) - and metric.data.aggregation_temporality - == AggregationTemporality.CUMULATIVE - ) + # Sort label keys for consistent ordering + all_label_keys = sorted(all_label_keys_set) - # The prometheus compatibility spec for sums says: If the aggregation temporality is cumulative and the sum is non-monotonic, it MUST be converted to a Prometheus Gauge. - should_convert_sum_to_gauge = ( - is_non_monotonic_sum and is_cumulative - ) + # Second pass: build label values with empty strings for missing labels + for attrs in data_point_attributes: + label_values = [] + for key in all_label_keys: + label_values.append(attrs.get(key, "")) + label_values_data_points.append(label_values) - if ( - isinstance(metric.data, Sum) - and not should_convert_sum_to_gauge - ): - metric_family_id = "|".join( - [per_metric_family_id, CounterMetricFamily.__name__] - ) + # Create metric family ID without label keys + per_metric_family_id = "|".join( + [ + metric_name, + metric_description, + metric_unit, + ] + ) + + is_non_monotonic_sum = ( + isinstance(metric.data, Sum) + and metric.data.is_monotonic is False + ) + is_cumulative = ( + isinstance(metric.data, Sum) + and metric.data.aggregation_temporality + == AggregationTemporality.CUMULATIVE + ) + + # The prometheus compatibility spec for sums says: If the aggregation temporality is cumulative and the sum is non-monotonic, it MUST be converted to a Prometheus Gauge. + should_convert_sum_to_gauge = ( + is_non_monotonic_sum and is_cumulative + ) + + if ( + isinstance(metric.data, Sum) + and not should_convert_sum_to_gauge + ): + metric_family_id = "|".join( + [per_metric_family_id, CounterMetricFamily.__name__] + ) - if metric_family_id not in metric_family_id_metric_family: - metric_family_id_metric_family[metric_family_id] = ( - CounterMetricFamily( - name=metric_name, - documentation=metric_description, - labels=label_keys, - unit=metric_unit, - ) + if metric_family_id not in metric_family_id_metric_family: + metric_family_id_metric_family[metric_family_id] = ( + CounterMetricFamily( + name=metric_name, + documentation=metric_description, + labels=all_label_keys, + unit=metric_unit, ) + ) + for label_values, value in zip( + label_values_data_points, values + ): metric_family_id_metric_family[ metric_family_id ].add_metric(labels=label_values, value=value) - elif ( - isinstance(metric.data, Gauge) - or should_convert_sum_to_gauge - ): - metric_family_id = "|".join( - [per_metric_family_id, GaugeMetricFamily.__name__] - ) + elif isinstance(metric.data, Gauge) or should_convert_sum_to_gauge: + metric_family_id = "|".join( + [per_metric_family_id, GaugeMetricFamily.__name__] + ) - if ( - metric_family_id - not in metric_family_id_metric_family.keys() - ): - metric_family_id_metric_family[metric_family_id] = ( - GaugeMetricFamily( - name=metric_name, - documentation=metric_description, - labels=label_keys, - unit=metric_unit, - ) + if ( + metric_family_id + not in metric_family_id_metric_family.keys() + ): + metric_family_id_metric_family[metric_family_id] = ( + GaugeMetricFamily( + name=metric_name, + documentation=metric_description, + labels=all_label_keys, + unit=metric_unit, ) + ) + for label_values, value in zip( + label_values_data_points, values + ): metric_family_id_metric_family[ metric_family_id ].add_metric(labels=label_values, value=value) - elif isinstance(metric.data, Histogram): - metric_family_id = "|".join( - [per_metric_family_id, HistogramMetricFamily.__name__] - ) + elif isinstance(metric.data, Histogram): + metric_family_id = "|".join( + [per_metric_family_id, HistogramMetricFamily.__name__] + ) - if ( - metric_family_id - not in metric_family_id_metric_family.keys() - ): - metric_family_id_metric_family[metric_family_id] = ( - HistogramMetricFamily( - name=metric_name, - documentation=metric_description, - labels=label_keys, - unit=metric_unit, - ) + if ( + metric_family_id + not in metric_family_id_metric_family.keys() + ): + metric_family_id_metric_family[metric_family_id] = ( + HistogramMetricFamily( + name=metric_name, + documentation=metric_description, + labels=all_label_keys, + unit=metric_unit, ) + ) + for label_values, value in zip( + label_values_data_points, values + ): metric_family_id_metric_family[ metric_family_id ].add_metric( @@ -358,10 +364,10 @@ def _translate_to_prometheus( ), sum_value=value["sum"], ) - else: - _logger.warning( - "Unsupported metric data. %s", type(metric.data) - ) + else: + _logger.warning( + "Unsupported metric data. %s", type(metric.data) + ) # pylint: disable=no-self-use def _check_value(self, value: Union[int, float, str, Sequence]) -> str: diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py index a7a3868a8a0..d98c69cb860 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py @@ -684,13 +684,11 @@ def test_multiple_data_points_with_different_label_sets(self): http_server_request_duration_seconds_bucket{http_target="/foobar",le="+Inf",net_host_port="8080"} 6.0 http_server_request_duration_seconds_count{http_target="/foobar",net_host_port="8080"} 6.0 http_server_request_duration_seconds_sum{http_target="/foobar",net_host_port="8080"} 579.0 - # HELP http_server_request_duration_seconds test multiple label sets - # TYPE http_server_request_duration_seconds histogram - http_server_request_duration_seconds_bucket{le="123.0",net_host_port="8080"} 1.0 - http_server_request_duration_seconds_bucket{le="456.0",net_host_port="8080"} 4.0 - http_server_request_duration_seconds_bucket{le="+Inf",net_host_port="8080"} 7.0 - http_server_request_duration_seconds_count{net_host_port="8080"} 7.0 - http_server_request_duration_seconds_sum{net_host_port="8080"} 579.0 + http_server_request_duration_seconds_bucket{http_target="",le="123.0",net_host_port="8080"} 1.0 + http_server_request_duration_seconds_bucket{http_target="",le="456.0",net_host_port="8080"} 4.0 + http_server_request_duration_seconds_bucket{http_target="",le="+Inf",net_host_port="8080"} 7.0 + http_server_request_duration_seconds_count{http_target="",net_host_port="8080"} 7.0 + http_server_request_duration_seconds_sum{http_target="",net_host_port="8080"} 579.0 """ ), )