Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions config/pgwatch-prometheus/metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -362,17 +362,21 @@ metrics:
This metric collects various PostgreSQL server settings and configurations.
It provides insights into the server's configuration, including version, memory settings, and other important parameters.
This metric is useful for monitoring server settings and ensuring optimal performance.
Note: For lock_timeout, we use reset_val instead of setting because pgwatch sets
lock_timeout to 100ms during metric collection, which would mask the actual configured value.
sqls:
11: |-
select /* pgwatch_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
current_database() as tag_datname,
name as tag_setting_name,
setting as tag_setting_value,
-- Use reset_val for lock_timeout because pgwatch sets it to 100ms during collection,
-- which would mask the actual configured value. reset_val shows the true server config.
case when name = 'lock_timeout' then reset_val else setting end as tag_setting_value,
unit as tag_unit,
category as tag_category,
vartype as tag_vartype,
case when setting ~ '^-?[0-9]+$' then setting::bigint else null end as numeric_value,
case when (case when name = 'lock_timeout' then reset_val else setting end) ~ '^-?[0-9]+$' then (case when name = 'lock_timeout' then reset_val else setting end)::bigint else null end as numeric_value,
case when source <> 'default' then 0 else 1 end as is_default,
1 as configured
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: is_default inconsistent with lock_timeout value

For lock_timeout, tag_setting_value now comes from reset_val while is_default is still derived from source (the session-overridden value). When pgwatch sets lock_timeout during collection, source becomes session, causing is_default to report non-default even if the configured reset_val equals the default, making the emitted fields contradictory.

Fix in Cursor Fix in Web

from pg_settings
Expand Down
Empty file added tests/settings/__init__.py
Empty file.
168 changes: 168 additions & 0 deletions tests/settings/test_settings_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Tests for the settings metric, particularly the lock_timeout fix.

This module tests that the settings metric correctly uses reset_val for lock_timeout
instead of the session-level setting value, which would be affected by pgwatch's
lock_timeout override during metric collection.

See: https://gitlab.com/postgres-ai/postgres_ai/-/issues/61
"""

import os
from pathlib import Path

import pytest
import yaml


def get_metrics_yml_path() -> Path:
"""Get the path to the metrics.yml file."""
repo_root = Path(__file__).parent.parent.parent
return repo_root / "config" / "pgwatch-prometheus" / "metrics.yml"


@pytest.fixture(name="metrics_config")
def fixture_metrics_config() -> dict:
"""Load the metrics.yml configuration."""
metrics_path = get_metrics_yml_path()
with open(metrics_path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)


@pytest.mark.unit
def test_settings_metric_exists(metrics_config: dict) -> None:
"""Test that the settings metric is defined in metrics.yml."""
assert "metrics" in metrics_config, "metrics key should exist"
assert "settings" in metrics_config["metrics"], "settings metric should be defined"
assert "sqls" in metrics_config["metrics"]["settings"], "settings metric should have sqls"
assert 11 in metrics_config["metrics"]["settings"]["sqls"], "settings metric should have SQL for version 11+"


@pytest.mark.unit
def test_settings_metric_uses_reset_val_for_lock_timeout(metrics_config: dict) -> None:
"""
Test that the settings metric uses reset_val for lock_timeout.

This is critical because pgwatch sets lock_timeout to 100ms during metric
collection, which would mask the actual configured value if we used 'setting'.

See: https://gitlab.com/postgres-ai/postgres_ai/-/issues/61
"""
sql = metrics_config["metrics"]["settings"]["sqls"][11]

# Verify the SQL uses reset_val for lock_timeout
assert "reset_val" in sql, "SQL should reference reset_val column"
assert "lock_timeout" in sql, "SQL should reference lock_timeout"

# More specific check: ensure the CASE statement is used correctly
assert "case when name = 'lock_timeout' then reset_val else setting end" in sql, \
"SQL should use CASE statement to select reset_val for lock_timeout"


@pytest.mark.unit
def test_settings_metric_description_documents_lock_timeout_fix(metrics_config: dict) -> None:
"""Test that the description documents the lock_timeout fix."""
description = metrics_config["metrics"]["settings"]["description"]
assert "lock_timeout" in description.lower(), \
"Description should mention lock_timeout"
assert "reset_val" in description.lower(), \
"Description should mention reset_val"


@pytest.mark.unit
def test_settings_metric_has_required_fields(metrics_config: dict) -> None:
"""Test that the settings metric SQL includes all required fields."""
sql = metrics_config["metrics"]["settings"]["sqls"][11]

required_fields = [
"epoch_ns",
"tag_datname",
"tag_setting_name",
"tag_setting_value",
"tag_unit",
"tag_category",
"tag_vartype",
"numeric_value",
"is_default",
"configured",
]

for field in required_fields:
assert field in sql, f"SQL should include {field}"


@pytest.mark.unit
def test_settings_metric_numeric_value_uses_correct_source_for_lock_timeout(metrics_config: dict) -> None:
"""
Test that numeric_value also uses reset_val for lock_timeout.

The numeric_value field should be derived from the same source as tag_setting_value
to ensure consistency.
"""
sql = metrics_config["metrics"]["settings"]["sqls"][11]

# Count occurrences of the CASE expression for lock_timeout
# It should appear twice: once for tag_setting_value and once for numeric_value
case_expr = "case when name = 'lock_timeout' then reset_val else setting end"
occurrences = sql.count(case_expr)

assert occurrences >= 2, \
f"CASE expression for lock_timeout should appear at least twice (for tag_setting_value and numeric_value), found {occurrences}"


@pytest.mark.integration
@pytest.mark.requires_postgres
def test_settings_metric_lock_timeout_returns_actual_value() -> None:
"""
Integration test: verify lock_timeout returns the actual configured value.

This test requires a running PostgreSQL database and tests that even when
lock_timeout is set to a different value in the session, our query returns
the actual configured (reset) value.
"""
try:
import psycopg
except ImportError:
pytest.skip("psycopg not available")

target_db_url = os.getenv(
"TARGET_DB_URL",
"postgresql://postgres:postgres@localhost:55432/target_database"
)

try:
conn = psycopg.connect(target_db_url)
except Exception as e:
pytest.skip(f"Could not connect to PostgreSQL: {e}")

try:
with conn.cursor() as cur:
# First, get the actual configured lock_timeout
cur.execute("SELECT reset_val FROM pg_settings WHERE name = 'lock_timeout'")
actual_value = cur.fetchone()[0]

# Now, simulate what pgwatch does - set a session-level lock_timeout
cur.execute("SET lock_timeout = '100ms'")

# Query using our fixed SQL logic
cur.execute("""
SELECT
name,
setting as raw_setting,
reset_val,
CASE WHEN name = 'lock_timeout' THEN reset_val ELSE setting END as our_value
FROM pg_settings
WHERE name = 'lock_timeout'
""")
row = cur.fetchone()
name, raw_setting, reset_val, our_value = row

# Verify: raw_setting should be 100 (the session override)
# but our_value should be the actual configured value
assert raw_setting == "100", f"Session setting should be 100ms, got {raw_setting}"
assert our_value == actual_value, \
f"Our query should return reset_val ({actual_value}), not session value ({raw_setting})"
assert our_value == reset_val, "our_value should equal reset_val"

finally:
conn.close()