diff --git a/nodescraper/base/inbanddataplugin.py b/nodescraper/base/inbanddataplugin.py index 12d53770..cfc7ec2e 100644 --- a/nodescraper/base/inbanddataplugin.py +++ b/nodescraper/base/inbanddataplugin.py @@ -23,17 +23,168 @@ # SOFTWARE. # ############################################################################### -from typing import Generic +import json +import os +from pathlib import Path +from typing import Any, Generic, Optional from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams from nodescraper.generictypes import TAnalyzeArg, TCollectArg, TDataModel from nodescraper.interfaces import DataPlugin +from nodescraper.models import DataModel +from nodescraper.utils import pascal_to_snake class InBandDataPlugin( DataPlugin[InBandConnectionManager, SSHConnectionParams, TDataModel, TCollectArg, TAnalyzeArg], Generic[TDataModel, TCollectArg, TAnalyzeArg], ): - """Base class for in band plugins""" + """Base class for in band plugins. + + Supports loading and comparing plugin data from scraper run directories + (e.g. for compare-runs). Subclasses get find_datamodel_path_in_run, + load_datamodel_from_path, get_extracted_errors, and load_run_data. + """ CONNECTION_TYPE = InBandConnectionManager + + @classmethod + def find_datamodel_path_in_run(cls, run_path: str) -> Optional[str]: + """Find this plugin's collector datamodel file under a scraper run directory. + + Looks for /// with result.json + whose parent matches this plugin, then a datamodel file (datamodel.json, + .json, or .log). + + Args: + run_path: Path to a scraper log run directory (e.g. scraper_logs_*). + + Returns: + Absolute path to the datamodel file, or None if not found. + """ + run_path = os.path.abspath(run_path) + if not os.path.isdir(run_path): + return None + collector_cls = getattr(cls, "COLLECTOR", None) + data_model_cls = getattr(cls, "DATA_MODEL", None) + if not collector_cls or not data_model_cls: + return None + collector_dir = os.path.join( + run_path, + pascal_to_snake(cls.__name__), + pascal_to_snake(collector_cls.__name__), + ) + if not os.path.isdir(collector_dir): + return None + result_path = os.path.join(collector_dir, "result.json") + if not os.path.isfile(result_path): + return None + try: + res_payload = json.loads(Path(result_path).read_text(encoding="utf-8")) + if res_payload.get("parent") != cls.__name__: + return None + except (json.JSONDecodeError, OSError): + return None + want_json = data_model_cls.__name__.lower() + ".json" + for fname in os.listdir(collector_dir): + low = fname.lower() + if low.endswith("datamodel.json") or low == want_json: + return os.path.join(collector_dir, fname) + if low.endswith(".log"): + return os.path.join(collector_dir, fname) + return None + + @classmethod + def load_datamodel_from_path(cls, dm_path: str) -> Optional[TDataModel]: + """Load this plugin's DATA_MODEL from a file path (JSON or .log). + + Args: + dm_path: Path to datamodel JSON or to a .log file (if DATA_MODEL + implements import_model for that format). + + Returns: + Instance of DATA_MODEL or None if load fails. + """ + dm_path = os.path.abspath(dm_path) + if not os.path.isfile(dm_path): + return None + data_model_cls = getattr(cls, "DATA_MODEL", None) + if not data_model_cls: + return None + try: + if dm_path.lower().endswith(".log"): + import_model = getattr(data_model_cls, "import_model", None) + if not callable(import_model): + return None + base_import = getattr(DataModel.import_model, "__func__", DataModel.import_model) + if getattr(import_model, "__func__", import_model) is base_import: + return None + return import_model(dm_path) + with open(dm_path, encoding="utf-8") as f: + data = json.load(f) + return data_model_cls.model_validate(data) + except (json.JSONDecodeError, OSError, Exception): + return None + + @classmethod + def get_extracted_errors(cls, data_model: DataModel) -> Optional[list[str]]: + """Compute extracted errors from datamodel for compare-runs (in memory only). + + Uses get_compare_content() on the datamodel and ANALYZER.get_error_matches + if this plugin has an ANALYZER; otherwise returns None. + + Args: + data_model: Loaded DATA_MODEL instance. + + Returns: + Sorted list of error match strings, or None if not applicable. + """ + get_content = getattr(data_model, "get_compare_content", None) + if not callable(get_content): + return None + try: + content = get_content() + except Exception: + return None + if not isinstance(content, str): + return None + analyzer_cls = getattr(cls, "ANALYZER", None) + if not analyzer_cls: + return None + get_matches = getattr(analyzer_cls, "get_error_matches", None) + if not callable(get_matches): + return None + try: + matches = get_matches(content) + return sorted(matches) if matches is not None else None + except Exception: + return None + + @classmethod + def load_run_data(cls, run_path: str) -> Optional[dict[str, Any]]: + """Load this plugin's run data from a scraper run directory for comparison. + + Finds the datamodel file, loads it, and returns a JSON-serializable dict + (model_dump) with optional "extracted_errors" if the plugin supports + get_compare_content and ANALYZER.get_error_matches. + + Args: + run_path: Path to a scraper log run directory or to a datamodel file. + + Returns: + Dict suitable for diffing with another run, or None if not found. + """ + run_path = os.path.abspath(run_path) + if not os.path.exists(run_path): + return None + dm_path = run_path if os.path.isfile(run_path) else cls.find_datamodel_path_in_run(run_path) + if not dm_path: + return None + data_model = cls.load_datamodel_from_path(dm_path) + if data_model is None: + return None + out = data_model.model_dump(mode="json") + extracted = cls.get_extracted_errors(data_model) + if extracted is not None: + out["extracted_errors"] = extracted + return out diff --git a/nodescraper/base/regexanalyzer.py b/nodescraper/base/regexanalyzer.py index 603d6c45..4103c99d 100644 --- a/nodescraper/base/regexanalyzer.py +++ b/nodescraper/base/regexanalyzer.py @@ -55,8 +55,29 @@ def count(self, val: int): class RegexAnalyzer(DataAnalyzer[TDataModel, TAnalyzeArg]): """Parent class for all regex based data analyzers.""" - # Class variable for timestamp pattern - can be overridden in subclasses TIMESTAMP_PATTERN: re.Pattern = re.compile(r"(\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)") + ERROR_REGEX: list[ErrorRegex] = [] + + @classmethod + def get_error_matches(cls, content: str) -> set[str]: + """Extract all error match strings from content using the analyzer's ERROR_REGEX. + Args: + content: Raw log text. + Returns: + Set of normalized error match strings. + """ + matches: set[str] = set() + for error_regex_obj in getattr(cls, "ERROR_REGEX", []): + for match in error_regex_obj.regex.findall(content): + if isinstance(match, str) and "\n" in match: + normalized = match.strip() + elif isinstance(match, (tuple, list)): + normalized = "\n".join(m for m in match if m) + else: + normalized = str(match).strip() if match else "" + if normalized: + matches.add(normalized) + return matches def _extract_timestamp_from_match_position( self, content: str, match_start: int diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 1b15afec..fe41cbab 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -33,6 +33,7 @@ from typing import Optional import nodescraper +from nodescraper.cli.compare_runs import run_compare_runs from nodescraper.cli.constants import DEFAULT_CONFIG, META_VAR_MAP from nodescraper.cli.dynamicparserbuilder import DynamicParserBuilder from nodescraper.cli.helper import ( @@ -224,6 +225,40 @@ def build_parser( help="Generate reference config from previous run logfiles. Writes to --output-path/reference_config.json if provided, otherwise ./reference_config.json.", ) + compare_runs_parser = subparsers.add_parser( + "compare-runs", + help="Compare datamodels from two run log directories", + ) + compare_runs_parser.add_argument( + "path1", + type=str, + help="Path to first run log directory", + ) + compare_runs_parser.add_argument( + "path2", + type=str, + help="Path to second run log directory", + ) + compare_runs_parser.add_argument( + "--skip-plugins", + nargs="*", + choices=list(plugin_reg.plugins.keys()), + metavar="PLUGIN", + help="Plugin names to exclude from comparison", + ) + compare_runs_parser.add_argument( + "--include-plugins", + nargs="*", + choices=list(plugin_reg.plugins.keys()), + metavar="PLUGIN", + help="If set, only compare data for these plugins (default: compare all found)", + ) + compare_runs_parser.add_argument( + "--dont-truncate", + action="store_true", + dest="dont_truncate", + help="Do not truncate the Message column; show full error text and all errors (not just first 3)", + ) config_builder_parser.add_argument( "--plugins", nargs="*", @@ -331,7 +366,11 @@ def main(arg_input: Optional[list[str]] = None): sname = system_info.name.lower().replace("-", "_").replace(".", "_") timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") - if parsed_args.log_path and parsed_args.subcmd not in ["gen-plugin-config", "describe"]: + if parsed_args.log_path and parsed_args.subcmd not in [ + "gen-plugin-config", + "describe", + "compare-runs", + ]: log_path = os.path.join( parsed_args.log_path, f"scraper_logs_{sname}_{timestamp}", @@ -358,6 +397,18 @@ def main(arg_input: Optional[list[str]] = None): if parsed_args.subcmd == "describe": parse_describe(parsed_args, plugin_reg, config_reg, logger) + if parsed_args.subcmd == "compare-runs": + run_compare_runs( + parsed_args.path1, + parsed_args.path2, + plugin_reg, + logger, + skip_plugins=getattr(parsed_args, "skip_plugins", None) or [], + include_plugins=getattr(parsed_args, "include_plugins", None), + truncate_message=not getattr(parsed_args, "dont_truncate", False), + ) + sys.exit(0) + if parsed_args.subcmd == "gen-plugin-config": if parsed_args.reference_config_from_logs: diff --git a/nodescraper/cli/compare_runs.py b/nodescraper/cli/compare_runs.py new file mode 100644 index 00000000..a41d8599 --- /dev/null +++ b/nodescraper/cli/compare_runs.py @@ -0,0 +1,496 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import json +import logging +import re +import sys +from pathlib import Path +from typing import Any, Optional, Sequence + +from pydantic import ValidationError + +from nodescraper.cli.helper import find_datamodel_and_result +from nodescraper.enums import ExecutionStatus +from nodescraper.models import PluginResult, TaskResult +from nodescraper.models.datamodel import DataModel +from nodescraper.pluginregistry import PluginRegistry +from nodescraper.resultcollators.tablesummary import TableSummary + +# Default regex for log comparison when plugin has no get_error_matches +_DEFAULT_LOG_ERROR_PATTERN = re.compile( + r"^.*\b(error|fail|critical|crit|warning|warn|alert|emerg)\b.*$", + re.MULTILINE | re.IGNORECASE, +) + + +def _default_log_error_matches(content: str) -> set[str]: + """Extract lines matching default error keywords; used for log plugins without analyzer regexes.""" + return { + m.group(0).strip() + for m in _DEFAULT_LOG_ERROR_PATTERN.finditer(content) + if m.group(0).strip() + } + + +def _get_compare_content(data_model: DataModel) -> Optional[str]: + """Get log content from datamodel for compare-runs (get_compare_content() or None).""" + get_content = getattr(data_model, "get_compare_content", None) + if callable(get_content): + try: + out = get_content() + return out if isinstance(out, str) else None + except Exception: + return None + return None + + +def _get_error_matches_for_analyzer(analyzer_class: type, content: str) -> set[str]: + """Get error matches from analyzer.get_error_matches if present, else default keyword match.""" + get_matches = getattr(analyzer_class, "get_error_matches", None) + if callable(get_matches): + try: + return get_matches(content) + except Exception: + pass + return _default_log_error_matches(content) + + +def _compute_extracted_errors_if_applicable( + data_model: DataModel, + plugin_name: str, + analyzer_class: Optional[type], + logger: logging.Logger, +) -> Optional[list[str]]: + """If datamodel has get_compare_content, compute extracted errors in memory (not saved to model or disk).""" + content = _get_compare_content(data_model) + if content is None: + return None + try: + if analyzer_class is not None: + matches = _get_error_matches_for_analyzer(analyzer_class, content) + else: + matches = _default_log_error_matches(content) + return sorted(matches) + except Exception as e: + logger.warning( + "Could not compute extracted_errors for %s: %s", + plugin_name, + e, + ) + return None + + +def _load_plugin_data_from_run( + base_path: str, plugin_reg: PluginRegistry, logger: logging.Logger +) -> dict[str, dict[str, Any]]: + """Load plugin name -> datamodel (as dict) for all plugins found in a run log directory. + + Args: + base_path: Path to run log directory (e.g. scraper_logs_*). + plugin_reg: Plugin registry to resolve DATA_MODEL per plugin. + logger: Logger for warnings. + + Returns: + Dict mapping plugin name to the datamodel as a JSON-serializable dict (model_dump). + """ + result: dict[str, dict[str, Any]] = {} + found = find_datamodel_and_result(base_path, plugin_reg) + + for dm_path, res_path in found: + try: + res_payload = json.loads(Path(res_path).read_text(encoding="utf-8")) + task_res = TaskResult(**res_payload) + plugin_name = task_res.parent + except (json.JSONDecodeError, TypeError, OSError) as e: + logger.warning("Skipping %s: failed to load result: %s", res_path, e) + continue + + if plugin_name is None: + logger.warning("Result %s has no parent plugin name, skipping.", res_path) + continue + + plugin = plugin_reg.plugins.get(plugin_name) + if not plugin: + logger.warning("Plugin %s not found in registry, skipping.", plugin_name) + continue + + load_run_data = getattr(plugin, "load_run_data", None) + if callable(load_run_data) and plugin_name not in result: + try: + data = load_run_data(base_path) + if data: + result[plugin_name] = data + except Exception as e: + logger.warning("Plugin %s load_run_data failed: %s", plugin_name, e) + continue + + data_model_cls = getattr(plugin, "DATA_MODEL", None) + if data_model_cls is None: + logger.warning("Plugin %s has no DATA_MODEL, skipping.", plugin_name) + continue + + try: + if dm_path.lower().endswith(".log"): + import_model = getattr(data_model_cls, "import_model", None) + if not callable(import_model): + logger.warning( + "Plugin %s datamodel is .log but has no import_model, skipping.", + plugin_name, + ) + continue + + # skipping plugins where import_model is not implemented + _import_func = getattr(import_model, "__func__", import_model) + _base_import_func = getattr( + DataModel.import_model, "__func__", DataModel.import_model + ) + if _import_func is _base_import_func: + logger.debug( + "Skipping %s for plugin %s: .log file not loadable (no custom import_model).", + dm_path, + plugin_name, + ) + continue + data_model = import_model(dm_path) + else: + dm_payload = json.loads(Path(dm_path).read_text(encoding="utf-8")) + data_model = data_model_cls.model_validate(dm_payload) + analyzer_class = getattr(plugin, "ANALYZER", None) + result[plugin_name] = data_model.model_dump(mode="json") + extracted = _compute_extracted_errors_if_applicable( + data_model, plugin_name, analyzer_class, logger + ) + if extracted is not None: + result[plugin_name]["extracted_errors"] = extracted + except (json.JSONDecodeError, OSError) as e: + logger.warning("Skipping %s for plugin %s: %s", dm_path, plugin_name, e) + continue + except ValidationError as e: + logger.warning( + "Skipping datamodel for plugin %s (validation error): %s", + plugin_name, + e, + ) + continue + + return result + + +# Paths to exclude from "Other differences" (e.g. timestamps that differ per run) +_DIFF_EXCLUDE_PATHS = frozenset({"start_time", "end_time"}) + + +def _is_timestamp_path(path: str) -> bool: + """True if path is a timestamp field we should exclude from diff output.""" + if path in _DIFF_EXCLUDE_PATHS: + return True + for excl in _DIFF_EXCLUDE_PATHS: + if path.endswith("." + excl): + return True + return False + + +def _filter_timestamp_diffs( + diffs: list[tuple[str, Optional[Any], Optional[Any]]], +) -> list[tuple[str, Optional[Any], Optional[Any]]]: + """Remove timestamp-only differences from diff list.""" + return [(p, v1, v2) for p, v1, v2 in diffs if not _is_timestamp_path(p)] + + +def _diff_value(val1: Any, val2: Any, path: str) -> list[tuple[str, Optional[Any], Optional[Any]]]: + """Recursively diff two JSON-like values; return list of (path, value_run1, value_run2).""" + diffs: list[tuple[str, Optional[Any], Optional[Any]]] = [] + + if type(val1) is not type(val2): + diffs.append((path, val1, val2)) + return diffs + + if isinstance(val1, dict): + all_keys = set(val1) | set(val2) + for key in sorted(all_keys): + sub_path = f"{path}.{key}" if path else key + if key not in val1: + diffs.append((sub_path, None, val2[key])) + elif key not in val2: + diffs.append((sub_path, val1[key], None)) + else: + diffs.extend(_diff_value(val1[key], val2[key], sub_path)) + return diffs + + if isinstance(val1, list): + for i in range(max(len(val1), len(val2))): + sub_path = f"{path}[{i}]" + v1 = val1[i] if i < len(val1) else None + v2 = val2[i] if i < len(val2) else None + if i >= len(val1): + diffs.append((sub_path, None, v2)) + elif i >= len(val2): + diffs.append((sub_path, v1, None)) + else: + diffs.extend(_diff_value(v1, v2, sub_path)) + return diffs + + if val1 != val2: + diffs.append((path, val1, val2)) + return diffs + + +# Max length for a single value in the full-diff file (avoids dumping huge .log content). +_FULL_DIFF_VALUE_CAP = 8192 + + +def _format_value(val: Any, max_len: Optional[int] = 80) -> str: + """Format a value for display; optionally truncate long strings (max_len=None for no truncation).""" + if val is None: + return "" + s = json.dumps(val) if not isinstance(val, str) else repr(val) + if max_len is not None and len(s) > max_len: + s = s[: max_len - 3] + "..." + return s + + +def _format_value_for_diff_file(val: Any) -> str: + """Format a value for the diff file; cap very long strings (e.g. journal/dmesg logs).""" + if val is None: + return "" + s = repr(val) if isinstance(val, str) else json.dumps(val) + if len(s) > _FULL_DIFF_VALUE_CAP: + s = s[:_FULL_DIFF_VALUE_CAP] + f" ... [truncated, total {len(s)} characters]" + return s + + +def _extracted_errors_compare( + data1: dict[str, Any], data2: dict[str, Any] +) -> tuple[list[str], list[str]]: + """If datamodels have extracted_errors, return (errors_only_in_run1, errors_only_in_run2).""" + err1 = set(data1.get("extracted_errors") or []) + err2 = set(data2.get("extracted_errors") or []) + return (sorted(err1 - err2), sorted(err2 - err1)) + + +def _build_full_diff_report( + path1: str, + path2: str, + data1: dict[str, dict[str, Any]], + data2: dict[str, dict[str, Any]], + all_plugins: list[str], +) -> str: + """Build a full diff report (no value truncation) for dumping to file.""" + lines = [ + "Compare-runs full diff report", + f"Run 1: {path1}", + f"Run 2: {path2}", + "", + ] + for plugin_name in all_plugins: + lines.append("=" * 80) + lines.append(f"Plugin: {plugin_name}") + lines.append("=" * 80) + if plugin_name not in data1: + lines.append(" Not present in run 1.") + lines.append("") + continue + if plugin_name not in data2: + lines.append(" Not present in run 2.") + lines.append("") + continue + d1, d2 = data1[plugin_name], data2[plugin_name] + has_extracted_errors = "extracted_errors" in d1 or "extracted_errors" in d2 + if has_extracted_errors: + only_in_1, only_in_2 = _extracted_errors_compare(d1, d2) + lines.append(" --- Errors only in run 1 ---") + for e in only_in_1: + lines.append(f" {_format_value_for_diff_file(e)}") + lines.append("") + lines.append(" --- Errors only in run 2 ---") + for e in only_in_2: + lines.append(f" {_format_value_for_diff_file(e)}") + lines.append("") + diffs = _filter_timestamp_diffs(_diff_value(d1, d2, "")) + if not diffs: + if not has_extracted_errors: + lines.append(" No differences.") + lines.append("") + continue + if has_extracted_errors: + lines.append( + " (Other field differences below; see above for extracted_errors comparison.)" + ) + lines.append("") + lines.append(f" {len(diffs)} difference(s):") + for p, v1, v2 in diffs: + lines.append(f" --- path: {p} ---") + lines.append(f" run1:\n{_format_value_for_diff_file(v1)}") + lines.append(f" run2:\n{_format_value_for_diff_file(v2)}") + lines.append("") + lines.append("") + return "\n".join(lines) + + +def run_compare_runs( + path1: str, + path2: str, + plugin_reg: PluginRegistry, + logger: logging.Logger, + skip_plugins: Optional[Sequence[str]] = None, + include_plugins: Optional[Sequence[str]] = None, + output_path: Optional[str] = None, + truncate_message: bool = True, +) -> None: + """Compare datamodels from two run log directories and log results. + + For each plugin present in either run: + - If plugin is only in one run, logs that it was not found in the other run. + - If plugin is in both runs, computes diff and logs differences or 'no differences'. + + Args: + path1: Path to first run log directory. + path2: Path to second run log directory. + plugin_reg: Plugin registry. + logger: Logger for output. + skip_plugins: Optional list of plugin names to exclude from comparison. + include_plugins: Optional list of plugin names to include; if set, only these are compared. + output_path: Optional path for full diff report; default is __diff.txt. + truncate_message: If True, truncate message text and show only first 3 errors; if False, show full text and all. + """ + p1 = Path(path1) + p2 = Path(path2) + if not p1.exists(): + logger.error("Path not found: %s", path1) + sys.exit(1) + if not p1.is_dir(): + logger.error("Path is not a directory: %s", path1) + sys.exit(1) + if not p2.exists(): + logger.error("Path not found: %s", path2) + sys.exit(1) + if not p2.is_dir(): + logger.error("Path is not a directory: %s", path2) + sys.exit(1) + + logger.info("Loading run 1 from: %s", path1) + data1 = _load_plugin_data_from_run(path1, plugin_reg, logger) + logger.info("Loading run 2 from: %s", path2) + data2 = _load_plugin_data_from_run(path2, plugin_reg, logger) + + all_plugins = sorted(set(data1) | set(data2)) + if include_plugins is not None: + include_set = set(include_plugins) + all_plugins = [p for p in all_plugins if p in include_set] + logger.info("Including only plugins: %s", ", ".join(sorted(include_set))) + if skip_plugins: + skip_set = set(skip_plugins) + all_plugins = [p for p in all_plugins if p not in skip_set] + if skip_set: + logger.info("Skipping plugins: %s", ", ".join(sorted(skip_set))) + if not all_plugins: + logger.warning("No plugin data found in either run.") + return + + plugin_results: list[PluginResult] = [] + for plugin_name in all_plugins: + if plugin_name not in data1: + plugin_results.append( + PluginResult( + source=plugin_name, + status=ExecutionStatus.NOT_RAN, + message=f"Plugin not found in run 1 (path: {path1}).", + ) + ) + continue + if plugin_name not in data2: + plugin_results.append( + PluginResult( + source=plugin_name, + status=ExecutionStatus.NOT_RAN, + message=f"Plugin not found in run 2 (path: {path2}).", + ) + ) + continue + + d1, d2 = data1[plugin_name], data2[plugin_name] + diffs = _filter_timestamp_diffs(_diff_value(d1, d2, "")) + if "extracted_errors" in d1 or "extracted_errors" in d2: + only_in_1, only_in_2 = _extracted_errors_compare(d1, d2) + msg_lines = [ + f"Errors only in run 1: {len(only_in_1)}; only in run 2: {len(only_in_2)}.", + ] + msg_max_len = None if not truncate_message else 120 + err_slice = slice(None) if not truncate_message else slice(3) + if only_in_1 or only_in_2: + if only_in_1: + msg_lines.append( + " Run 1 only (first 3):" if truncate_message else " Run 1 only:" + ) + for e in only_in_1[err_slice]: + msg_lines.append(f" {_format_value(e, max_len=msg_max_len)}") + if only_in_2: + msg_lines.append( + " Run 2 only (first 3):" if truncate_message else " Run 2 only:" + ) + for e in only_in_2[err_slice]: + msg_lines.append(f" {_format_value(e, max_len=msg_max_len)}") + status = ExecutionStatus.WARNING if (only_in_1 or only_in_2) else ExecutionStatus.OK + plugin_results.append( + PluginResult( + source=plugin_name, + status=status, + message="\n".join(msg_lines), + ) + ) + elif not diffs: + plugin_results.append( + PluginResult( + source=plugin_name, + status=ExecutionStatus.OK, + message="No differences.", + ) + ) + else: + diff_max_len = None if not truncate_message else 80 + msg_lines = [f"{len(diffs)} difference(s):"] + for p, v1, v2 in diffs: + msg_lines.append( + f" {p}: run1={_format_value(v1, max_len=diff_max_len)} run2={_format_value(v2, max_len=diff_max_len)}" + ) + plugin_results.append( + PluginResult( + source=plugin_name, + status=ExecutionStatus.WARNING, + message="\n".join(msg_lines), + ) + ) + + out_file = output_path + if not out_file: + out_file = f"{Path(path1).name}_{Path(path2).name}_diff.txt" + full_report = _build_full_diff_report(path1, path2, data1, data2, all_plugins) + Path(out_file).write_text(full_report, encoding="utf-8") + logger.info("Full diff report written to: %s", out_file) + + table_summary = TableSummary(logger=logger) + table_summary.collate_results(plugin_results=plugin_results, connection_results=[]) + print(f"Diff file written to {out_file}") # noqa: T201 diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index dc3804b6..8647f4d3 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -418,15 +418,13 @@ def generate_reference_config_from_logs( Returns: PluginConfig: instance of plugin config """ - found = find_datamodel_and_result(path) + found = find_datamodel_and_result(path, plugin_reg) plugin_config = PluginConfig() plugins = {} for dm, res in found: result_path = Path(res) res_payload = json.loads(result_path.read_text(encoding="utf-8")) task_res = TaskResult(**res_payload) - dm_path = Path(dm) - dm_payload = json.loads(dm_path.read_text(encoding="utf-8")) plugin = plugin_reg.plugins.get(task_res.parent) if not plugin: logger.warning( @@ -435,7 +433,19 @@ def generate_reference_config_from_logs( ) continue - data_model = plugin.DATA_MODEL.model_validate(dm_payload) + data_model_cls = getattr(plugin, "DATA_MODEL", None) + if not data_model_cls: + continue + dm_path = Path(dm) + if str(dm_path).lower().endswith(".log"): + import_model = getattr(data_model_cls, "import_model", None) + if callable(import_model): + data_model = import_model(str(dm_path)) + else: + continue + else: + dm_payload = json.loads(dm_path.read_text(encoding="utf-8")) + data_model = data_model_cls.model_validate(dm_payload) args = extract_analyzer_args_from_model(plugin, data_model, logger) if not args: @@ -447,30 +457,58 @@ def generate_reference_config_from_logs( return plugin_config -def find_datamodel_and_result(base_path: str) -> list[Tuple[str, str]]: +def find_datamodel_and_result( + base_path: str, plugin_reg: Optional[PluginRegistry] = None +) -> list[Tuple[str, str]]: """Get datamodel and result files Args: base_path (str): location of previous run logs + plugin_reg (Optional[PluginRegistry]): if provided, also find datamodel files + named .json or any *.log in the collector dir Returns: - list[Tuple[str, str]]: tuple of datamodel and result json files + list[Tuple[str, str]]: tuple of (datamodel_path, result_path) """ - tuple_list: list[Tuple[str, str, str]] = [] + tuple_list: list[Tuple[str, str]] = [] for root, _, files in os.walk(base_path): - if "collector" in os.path.basename(root).lower(): - datamodel_path = None - result_path = None + if "collector" not in os.path.basename(root).lower(): + continue + result_path = os.path.join(root, "result.json") + if "result.json" not in [f for f in files if f.lower() == "result.json"]: + continue + datamodel_path = None + if plugin_reg: + try: + res_payload = json.loads(Path(result_path).read_text(encoding="utf-8")) + parent = res_payload.get("parent") + if parent: + plugin = plugin_reg.plugins.get(parent) + data_model_cls = getattr(plugin, "DATA_MODEL", None) if plugin else None + if data_model_cls: + want_json = data_model_cls.__name__.lower() + ".json" + for fname in files: + low = fname.lower() + if low.endswith("datamodel.json") or low == want_json: + datamodel_path = os.path.join(root, fname) + break + if not datamodel_path: + for fname in files: + if fname.lower().endswith(".log"): + datamodel_path = os.path.join(root, fname) + break + except (json.JSONDecodeError, OSError): + pass + + if not datamodel_path: for fname in files: - low = fname.lower() - if low.endswith("datamodel.json"): + if fname.lower().endswith("datamodel.json"): datamodel_path = os.path.join(root, fname) - elif low == "result.json": - result_path = os.path.join(root, fname) + break - if datamodel_path and result_path: - tuple_list.append((datamodel_path, result_path)) + if datamodel_path and result_path: + tuple_list.append((datamodel_path, result_path)) return tuple_list diff --git a/nodescraper/helpers/__init__.py b/nodescraper/helpers/__init__.py new file mode 100644 index 00000000..194f122b --- /dev/null +++ b/nodescraper/helpers/__init__.py @@ -0,0 +1,38 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.plugins.inband.dmesg import ( + compare_dmesg_runs, + compute_extracted_errors, + find_dmesg_datamodel_path, + load_dmesg_data, +) + +__all__ = [ + "compare_dmesg_runs", + "compute_extracted_errors", + "find_dmesg_datamodel_path", + "load_dmesg_data", +] diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index 6822d329..997fd67b 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -128,7 +128,7 @@ def load_plugins_from_entry_points() -> dict[str, type]: except TypeError: # Python 3.9 - entry_points() returns dict-like object all_eps = importlib.metadata.entry_points() # type: ignore[assignment] - eps = all_eps.get("nodescraper.plugins", []) # type: ignore[assignment, attr-defined] + eps = all_eps.get("nodescraper.plugins", []) # type: ignore[assignment, attr-defined, arg-type] for entry_point in eps: try: diff --git a/nodescraper/plugins/inband/dmesg/__init__.py b/nodescraper/plugins/inband/dmesg/__init__.py index 5be1de0a..b72ba802 100644 --- a/nodescraper/plugins/inband/dmesg/__init__.py +++ b/nodescraper/plugins/inband/dmesg/__init__.py @@ -24,5 +24,17 @@ # ############################################################################### from .dmesg_plugin import DmesgPlugin +from .run_compare import ( + compare_dmesg_runs, + compute_extracted_errors, + find_dmesg_datamodel_path, + load_dmesg_data, +) -__all__ = ["DmesgPlugin"] +__all__ = [ + "DmesgPlugin", + "compare_dmesg_runs", + "compute_extracted_errors", + "find_dmesg_datamodel_path", + "load_dmesg_data", +] diff --git a/nodescraper/plugins/inband/dmesg/dmesgdata.py b/nodescraper/plugins/inband/dmesg/dmesgdata.py index 26c7f9f3..47335650 100644 --- a/nodescraper/plugins/inband/dmesg/dmesgdata.py +++ b/nodescraper/plugins/inband/dmesg/dmesgdata.py @@ -68,6 +68,10 @@ def get_new_dmesg_lines(cls, current_dmesg: str, new_dmesg: str) -> str: return ("\n").join(new_lines) + def get_compare_content(self) -> str: + """Return the log content used for compare-runs extracted_errors (same as dmesg_content).""" + return self.dmesg_content + def merge_data(self, input_data: "DmesgData"): """Merge dmesg data with new input data diff --git a/nodescraper/plugins/inband/dmesg/run_compare.py b/nodescraper/plugins/inband/dmesg/run_compare.py new file mode 100644 index 00000000..9b75ce4a --- /dev/null +++ b/nodescraper/plugins/inband/dmesg/run_compare.py @@ -0,0 +1,95 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import logging +import os +from typing import Optional, Tuple + +from .dmesg_plugin import DmesgPlugin +from .dmesgdata import DmesgData + + +def find_dmesg_datamodel_path(run_path: str) -> Optional[str]: + """Find the DmesgPlugin collector datamodel under a scraper run directory. + + Delegates to DmesgPlugin.find_datamodel_path_in_run (see InBandDataPlugin). + """ + return DmesgPlugin.find_datamodel_path_in_run(run_path) + + +def load_dmesg_data(path: str) -> Tuple[Optional[DmesgData], Optional[str]]: + """Load DmesgData from a scraper run directory or a datamodel file path. + + Uses DmesgPlugin.load_datamodel_from_path and find_datamodel_path_in_run. + """ + path = os.path.abspath(path) + if not os.path.exists(path): + return None, None + dm_path = path if os.path.isfile(path) else DmesgPlugin.find_datamodel_path_in_run(path) + if not dm_path: + return None, None + dm = DmesgPlugin.load_datamodel_from_path(dm_path) + return (dm, dm_path) if dm is not None else (None, None) + + +def compute_extracted_errors(dm: DmesgData) -> list[str]: + """Apply DmesgPlugin analyzer regexes to dmesg content (in memory only). + + Delegates to DmesgPlugin.get_extracted_errors. + """ + out = DmesgPlugin.get_extracted_errors(dm) + return out if out is not None else [] + + +def compare_dmesg_runs( + path1: str, + path2: str, + logger: Optional[logging.Logger] = None, +) -> Tuple[list[str], list[str], str, str]: + """Load two DmesgPlugin runs, compute extracted errors in memory, and compare. + + Uses DmesgPlugin.load_run_data; same logic is available for any InBandDataPlugin + via plugin.load_run_data and compare-runs. + """ + log = logger or logging.getLogger(__name__) + label1 = os.path.basename(path1.rstrip(os.sep)) + label2 = os.path.basename(path2.rstrip(os.sep)) + + d1 = DmesgPlugin.load_run_data(path1) + d2 = DmesgPlugin.load_run_data(path2) + + if d1 is None: + log.warning("No DmesgPlugin datamodel found at: %s", path1) + return [], [], label1, label2 + if d2 is None: + log.warning("No DmesgPlugin datamodel found at: %s", path2) + return [], [], label1, label2 + + err1 = set(d1.get("extracted_errors") or []) + err2 = set(d2.get("extracted_errors") or []) + only_in_1 = sorted(err1 - err2) + only_in_2 = sorted(err2 - err1) + + return only_in_1, only_in_2, label1, label2 diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index ca218b11..a6639422 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -140,3 +140,20 @@ def log_model(self, log_path: str): log_name = os.path.join(log_path, "journal.log") with open(log_name, "w", encoding="utf-8") as log_filename: log_filename.write(self.journal_log) + + @classmethod + def import_model(cls, model_input: Union[str, dict]) -> "JournalData": + """Load journal data from a file path or dict. + + Args: + model_input: Path to journal.log file, or dict of field values. + + Returns: + JournalData: Loaded journal data instance. + """ + if isinstance(model_input, dict): + return cls(**model_input) + if isinstance(model_input, str): + with open(model_input, "r", encoding="utf-8") as f: + return cls(journal_log=f.read(), journal_content_json=[]) + raise ValueError("Invalid input for journal data") diff --git a/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/result.json b/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/result.json new file mode 100644 index 00000000..042a8d56 --- /dev/null +++ b/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/result.json @@ -0,0 +1,8 @@ +{ + "status": "OK", + "message": "SYSCTL data collected", + "task": "SysctlCollector", + "parent": "SysctlPlugin", + "start_time": "2026-02-10T18:00:00.000000", + "end_time": "2026-02-10T18:00:01.000000" +} diff --git a/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/sysctldatamodel.json b/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/sysctldatamodel.json new file mode 100644 index 00000000..bd25b557 --- /dev/null +++ b/test/unit/framework/fixtures/compare_runs_sysctl/run1/sysctl_plugin/sysctl_collector/sysctldatamodel.json @@ -0,0 +1,13 @@ +{ + "vm_swappiness": 60, + "vm_numa_balancing": 0, + "vm_oom_kill_allocating_task": 0, + "vm_compaction_proactiveness": 0, + "vm_compact_unevictable_allowed": 1, + "vm_extfrag_threshold": 500, + "vm_zone_reclaim_mode": 0, + "vm_dirty_background_ratio": 10, + "vm_dirty_ratio": 20, + "vm_dirty_writeback_centisecs": 500, + "kernel_numa_balancing": 0 +} diff --git a/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/result.json b/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/result.json new file mode 100644 index 00000000..042a8d56 --- /dev/null +++ b/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/result.json @@ -0,0 +1,8 @@ +{ + "status": "OK", + "message": "SYSCTL data collected", + "task": "SysctlCollector", + "parent": "SysctlPlugin", + "start_time": "2026-02-10T18:00:00.000000", + "end_time": "2026-02-10T18:00:01.000000" +} diff --git a/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/sysctldatamodel.json b/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/sysctldatamodel.json new file mode 100644 index 00000000..2e8df42f --- /dev/null +++ b/test/unit/framework/fixtures/compare_runs_sysctl/run2/sysctl_plugin/sysctl_collector/sysctldatamodel.json @@ -0,0 +1,13 @@ +{ + "vm_swappiness": 10, + "vm_numa_balancing": 0, + "vm_oom_kill_allocating_task": 0, + "vm_compaction_proactiveness": 0, + "vm_compact_unevictable_allowed": 1, + "vm_extfrag_threshold": 500, + "vm_zone_reclaim_mode": 0, + "vm_dirty_background_ratio": 5, + "vm_dirty_ratio": 15, + "vm_dirty_writeback_centisecs": 500, + "kernel_numa_balancing": 0 +} diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py index 6c2c955f..5b88bf7e 100644 --- a/test/unit/framework/test_cli_helper.py +++ b/test/unit/framework/test_cli_helper.py @@ -52,6 +52,7 @@ from nodescraper.models import PluginConfig, TaskResult from nodescraper.models.datapluginresult import DataPluginResult from nodescraper.models.pluginresult import PluginResult +from nodescraper.pluginregistry import PluginRegistry def test_generate_reference_config(plugin_registry): @@ -161,6 +162,30 @@ def test_find_datamodel_and_result_with_fixture(framework_fixtures_path): assert rt.name == "result.json" +def test_find_datamodel_and_result_with_plugin_reg_finds_log(tmp_path): + """With plugin_reg, a collector dir with result.json and a .log file finds the .log.""" + collector = tmp_path / "collector" + collector.mkdir() + result_json = { + "status": "OK", + "message": "ok", + "task": "JournalCollector", + "parent": "JournalPlugin", + "start_time": "2025-01-01T00:00:00", + "end_time": "2025-01-01T00:00:01", + } + (collector / "result.json").write_text(json.dumps(result_json), encoding="utf-8") + (collector / "journal.log").write_text("some log line\n", encoding="utf-8") + + plugin_reg = PluginRegistry() + pairs = find_datamodel_and_result(str(tmp_path), plugin_reg) + + assert len(pairs) == 1 + dm_path, res_path = pairs[0] + assert Path(dm_path).name == "journal.log" + assert Path(res_path).name == "result.json" + + def test_generate_reference_config_from_logs(framework_fixtures_path): logger = logging.getLogger() res_payload = json.loads( diff --git a/test/unit/framework/test_compare_runs.py b/test/unit/framework/test_compare_runs.py new file mode 100644 index 00000000..6f6454ed --- /dev/null +++ b/test/unit/framework/test_compare_runs.py @@ -0,0 +1,170 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import logging + +from nodescraper.cli.compare_runs import ( + _diff_value, + _format_value, + run_compare_runs, +) +from nodescraper.pluginregistry import PluginRegistry + + +def test_diff_value_empty_dicts(): + """Same empty dicts produce no diffs.""" + assert _diff_value({}, {}, "") == [] + + +def test_diff_value_identical_dicts(): + """Identical dicts produce no diffs.""" + d = {"a": 1, "b": "x"} + assert _diff_value(d, d, "") == [] + + +def test_diff_value_different_scalar(): + """Different scalar values produce one diff.""" + diffs = _diff_value(1, 2, "ver") + assert len(diffs) == 1 + assert diffs[0] == ("ver", 1, 2) + + +def test_diff_value_key_only_in_first(): + """Key missing in second run is reported.""" + diffs = _diff_value({"a": 1, "b": 2}, {"a": 1}, "") + assert len(diffs) == 1 + assert diffs[0][0] == "b" + assert diffs[0][1] == 2 + assert diffs[0][2] is None + + +def test_diff_value_key_only_in_second(): + """Key missing in first run is reported.""" + diffs = _diff_value({"a": 1}, {"a": 1, "b": 2}, "") + assert len(diffs) == 1 + assert diffs[0][0] == "b" + assert diffs[0][1] is None + assert diffs[0][2] == 2 + + +def test_diff_value_nested_dict(): + """Nested dict differences are reported with path.""" + d1 = {"a": {"x": 1}} + d2 = {"a": {"x": 2}} + diffs = _diff_value(d1, d2, "") + assert len(diffs) == 1 + assert diffs[0] == ("a.x", 1, 2) + + +def test_diff_value_list_same(): + """Identical lists produce no diffs.""" + assert _diff_value([1, 2], [1, 2], "list") == [] + + +def test_diff_value_list_different_length(): + """List length difference is reported.""" + diffs = _diff_value([1, 2], [1, 2, 3], "arr") + assert len(diffs) == 1 + assert diffs[0][0] == "arr[2]" + assert diffs[0][1] is None + assert diffs[0][2] == 3 + + +def test_diff_value_list_different_element(): + """Different list element is reported.""" + diffs = _diff_value([1, 2], [1, 99], "arr") + assert len(diffs) == 1 + assert diffs[0] == ("arr[1]", 2, 99) + + +def test_format_value_none(): + """None formats as .""" + assert _format_value(None) == "" + + +def test_format_value_short_string(): + """Short value is unchanged.""" + assert _format_value("ok") == "'ok'" + + +def test_format_value_long_truncated(): + """Long value is truncated.""" + long_str = "x" * 100 + out = _format_value(long_str, max_len=50) + assert len(out) == 50 + assert out.endswith("...") + + +def test_run_compare_runs_no_data(caplog, tmp_path): + """When neither run has plugin data, warning is logged.""" + (tmp_path / "run1").mkdir() + (tmp_path / "run2").mkdir() + logger = logging.getLogger("test_compare_runs") + plugin_reg = PluginRegistry() + run_compare_runs(str(tmp_path / "run1"), str(tmp_path / "run2"), plugin_reg, logger) + assert "No plugin data found" in caplog.text + + +def test_run_compare_runs_with_fixture_dirs(caplog, framework_fixtures_path): + """With two log dirs using fixture (same content), table is produced and no diffs.""" + logger = logging.getLogger("test_compare_runs") + plugin_reg = PluginRegistry() + base = framework_fixtures_path / "log_dir" + run_compare_runs(str(base), str(base), plugin_reg, logger) + assert "Loading run 1" in caplog.text + assert "Loading run 2" in caplog.text + # Same dir twice: BiosPlugin should be found, no differences + assert "Plugin" in caplog.text + assert "No differences" in caplog.text or "difference" in caplog.text.lower() + + +def test_run_compare_runs_sysctl_fixture(caplog, framework_fixtures_path): + """Compare runs using compare_runs_sysctl fixtures; run1 and run2 differ (SysctlPlugin).""" + base = framework_fixtures_path / "compare_runs_sysctl" + run1 = base / "run1" + run2 = base / "run2" + logger = logging.getLogger("test_compare_runs") + plugin_reg = PluginRegistry() + run_compare_runs(str(run1), str(run2), plugin_reg, logger) + assert "Loading run 1" in caplog.text + assert "Loading run 2" in caplog.text + assert "Sysctl" in caplog.text or "sysctl" in caplog.text + # Fixtures have different vm_swappiness etc. so we expect differences + assert "difference" in caplog.text.lower() or "only in" in caplog.text.lower() + + +def test_run_compare_runs_one_run_missing_plugin(caplog, framework_fixtures_path, tmp_path): + """When a plugin exists only in run1, NOT_RAN message for run2 is in results.""" + logger = logging.getLogger("test_compare_runs") + plugin_reg = PluginRegistry() + run1 = framework_fixtures_path / "log_dir" + run2 = tmp_path / "run2" + run2.mkdir() + # run2 has no collector dirs + run_compare_runs(str(run1), str(run2), plugin_reg, logger) + assert "Loading run 1" in caplog.text + assert "Loading run 2" in caplog.text + # BiosPlugin in run1 only -> we should see "not found in run 2" + assert "not found in run 2" in caplog.text or "NOT_RAN" in caplog.text