From 2d8bd79e0c43181cc13c4a9781b451164226031e Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Sat, 31 Jan 2026 15:21:16 -0500 Subject: [PATCH 01/26] Refactor PhabricatorPatch to use async HTTP requests Converted file retrieval methods in PhabricatorPatch and related interfaces to async, replacing synchronous HTTP calls with httpx.AsyncClient. --- bugbug/tools/core/platforms/phabricator.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 7bd5f29dd2..7a01099978 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -53,6 +53,12 @@ def get_phabricator_client( # This is awkward since PhabricatorAPI does not accept user agent directly set_default_value("User-Agent", "name", user_agent) + if not user_agent: + user_agent = get_user_agent() + + # This is awkward since PhabricatorAPI does not accept user agent directly + set_default_value("User-Agent", "name", user_agent) + return PhabricatorAPI(api_key, url) From 76b4fecf37822e6522009263d5bf78c440567ba6 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Tue, 27 Jan 2026 19:10:43 +0100 Subject: [PATCH 02/26] Extend Bugzilla trusted user policy Trusted users are now: - Mozilla Corporation employees, OR - Users with editbugs privilege who have been active within last year This prevents dormant accounts with editbugs from being considered trusted. --- bugbug/tools/core/platforms/bugzilla.py | 41 ++++++- tests/test_bugzilla_trusted_filtering.py | 149 +++++++++++++++++++++++ 2 files changed, 186 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index ee89283434..60ab861b7a 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -7,12 +7,15 @@ import logging import os +from datetime import datetime, timedelta, timezone from libmozdata.bugzilla import Bugzilla, BugzillaBase logger = logging.getLogger(__name__) MOZILLA_CORP_GROUP_ID = 42 +EDITBUGS_GROUP_ID = 9 +EDITBUGS_CUTOFF_DAYS = 365 BugzillaBase.TOKEN = os.getenv("BUGZILLA_TOKEN") @@ -24,7 +27,9 @@ def _check_users_batch(emails: list[str]) -> dict[str, bool]: emails: List of email addresses to check Returns: - Dictionary mapping email to trusted status (MOCO group only) + Dictionary mapping email to trusted status based on: + - Mozilla Corporation group membership, OR + - editbugs group membership AND activity within last year Raises: ValueError: If Bugzilla token is not available @@ -51,8 +56,36 @@ def user_handler(user, data): return groups = user.get("groups", []) - # Check for mozilla-corporation group (ID 42) only - is_trusted = any(g.get("id") == MOZILLA_CORP_GROUP_ID for g in groups) + group_ids = {g.get("id") for g in groups} + + # Check if user is in mozilla-corporation group + is_moco = MOZILLA_CORP_GROUP_ID in group_ids + + # Check if user has editbugs and has been seen recently + has_editbugs = EDITBUGS_GROUP_ID in group_ids + last_seen_date = user.get("last_seen_date") + is_recently_active = False + + if last_seen_date: + try: + last_seen = datetime.fromisoformat( + last_seen_date.replace("Z", "+00:00") + ) + one_year_ago = datetime.now(timezone.utc) - timedelta( + days=EDITBUGS_CUTOFF_DAYS + ) + is_recently_active = last_seen > one_year_ago + + if has_editbugs and not is_recently_active: + days_since_seen = (datetime.now(timezone.utc) - last_seen).days + logger.warning( + f"User {email} has editbugs but hasn't been seen in {days_since_seen} days" + ) + except (ValueError, TypeError) as e: + logger.warning(f"Failed to parse last_seen_date for {email}: {e}") + + # Trusted if: MOCO employee OR (has editbugs AND active within last year) + is_trusted = is_moco or (has_editbugs and is_recently_active) data[email] = is_trusted def fault_user_handler(user, data): @@ -62,7 +95,7 @@ def fault_user_handler(user, data): user_data: dict[str, bool] = {} BugzillaUser( user_names=emails, - include_fields=["name", "groups"], + include_fields=["name", "groups", "last_seen_date"], user_handler=user_handler, fault_user_handler=fault_user_handler, user_data=user_data, diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index a8395629c3..49fbf9dc22 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -656,3 +656,152 @@ def test_trusted_comment_validates_before_untrusted_history_after(): # Untrusted history AFTER trusted comment should be filtered assert filtered_history == 1 assert "[Filtered]" in timeline_text + + +@responses.activate +def test_editbugs_with_recent_activity_is_trusted(): + """Test that users with editbugs and recent activity are trusted.""" + from datetime import datetime, timezone + + from libmozdata.bugzilla import BugzillaBase + + old_token = BugzillaBase.TOKEN + try: + bugzilla_module.set_token("test_token") + + # User with editbugs who was seen recently (today) + recent_date = datetime.now(timezone.utc).isoformat() + responses.add( + responses.GET, + "https://bugzilla.mozilla.org/rest/user", + json={ + "users": [ + { + "name": "editbugs@example.com", + "groups": [ + {"id": 9, "name": "editbugs"}, + {"id": 69, "name": "everyone"}, + ], + "last_seen_date": recent_date, + } + ], + "faults": [], + }, + status=200, + ) + + results = _check_users_batch(["editbugs@example.com"]) + assert results["editbugs@example.com"] is True + + finally: + BugzillaBase.TOKEN = old_token + + +@responses.activate +def test_editbugs_with_stale_activity_is_untrusted(): + """Test that users with editbugs but stale activity (>365 days) are not trusted.""" + from libmozdata.bugzilla import BugzillaBase + + old_token = BugzillaBase.TOKEN + try: + bugzilla_module.set_token("test_token") + + # User with editbugs who was last seen over a year ago + stale_date = "2022-01-01T00:00:00Z" + responses.add( + responses.GET, + "https://bugzilla.mozilla.org/rest/user", + json={ + "users": [ + { + "name": "stale@example.com", + "groups": [ + {"id": 9, "name": "editbugs"}, + {"id": 69, "name": "everyone"}, + ], + "last_seen_date": stale_date, + } + ], + "faults": [], + }, + status=200, + ) + + results = _check_users_batch(["stale@example.com"]) + assert results["stale@example.com"] is False + + finally: + BugzillaBase.TOKEN = old_token + + +@responses.activate +def test_moco_without_recent_activity_is_trusted(): + """Test that MOCO users are trusted regardless of activity date.""" + from libmozdata.bugzilla import BugzillaBase + + old_token = BugzillaBase.TOKEN + try: + bugzilla_module.set_token("test_token") + + # MOCO user with stale activity is still trusted + stale_date = "2022-01-01T00:00:00Z" + responses.add( + responses.GET, + "https://bugzilla.mozilla.org/rest/user", + json={ + "users": [ + { + "name": "moco@mozilla.com", + "groups": [ + {"id": 42, "name": "mozilla-corporation"}, + {"id": 69, "name": "everyone"}, + ], + "last_seen_date": stale_date, + } + ], + "faults": [], + }, + status=200, + ) + + results = _check_users_batch(["moco@mozilla.com"]) + assert results["moco@mozilla.com"] is True + + finally: + BugzillaBase.TOKEN = old_token + + +@responses.activate +def test_editbugs_without_last_seen_is_untrusted(): + """Test that editbugs users without last_seen_date are not trusted.""" + from libmozdata.bugzilla import BugzillaBase + + old_token = BugzillaBase.TOKEN + try: + bugzilla_module.set_token("test_token") + + # User with editbugs but no last_seen_date field + responses.add( + responses.GET, + "https://bugzilla.mozilla.org/rest/user", + json={ + "users": [ + { + "name": "no_activity@example.com", + "groups": [ + {"id": 9, "name": "editbugs"}, + {"id": 69, "name": "everyone"}, + ], + # No last_seen_date field + } + ], + "faults": [], + }, + status=200, + ) + + results = _check_users_batch(["no_activity@example.com"]) + assert results["no_activity@example.com"] is False + + finally: + BugzillaBase.TOKEN = old_token From 1b1edbfbb3134af296014032e0d4bf1225403d35 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Tue, 27 Jan 2026 19:11:25 +0100 Subject: [PATCH 03/26] Redact untrusted Bugzilla metadata Refactor metadata filtering into a SanitizedBug class that inherits from Bug and overrides properties to return sanitized values when no trusted user has commented. When there's no trusted comment, the following are redacted: - Bug title/summary - Reporter name and email - Assignee name and email The trust check uses cached_property to avoid redundant API calls. Bug.get() now returns SanitizedBug by default for security. --- bugbug/tools/core/platforms/bugzilla.py | 298 +++++++++++++++++------ tests/test_bugzilla_trusted_filtering.py | 241 ++++++++++-------- 2 files changed, 375 insertions(+), 164 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 60ab861b7a..a39c3df17f 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -8,6 +8,7 @@ import logging import os from datetime import datetime, timedelta, timezone +from functools import cached_property from libmozdata.bugzilla import Bugzilla, BugzillaBase @@ -17,6 +18,10 @@ EDITBUGS_GROUP_ID = 9 EDITBUGS_CUTOFF_DAYS = 365 +REDACTED_TITLE = "[Unvalidated bug title redacted for security]" +REDACTED_REPORTER = "- **Reporter**: [Redacted]" +REDACTED_ASSIGNEE = "- **Assignee**: [Redacted]" + BugzillaBase.TOKEN = os.getenv("BUGZILLA_TOKEN") @@ -279,89 +284,63 @@ def create_bug_timeline(comments: list[dict], history: list[dict]) -> list[str]: return timeline -def bug_dict_to_markdown(bug): - md_lines = [] - is_trusted_cache: dict[str, bool] = {} +def bug_to_markdown(bug: "Bug") -> str: + """Convert a Bug object to markdown representation. - # Sanitize comments and history before processing - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(bug["comments"], bug["history"], is_trusted_cache) - ) + Uses the bug's properties directly - sanitization is handled by the Bug + subclass (SanitizedBug) through property overrides. + """ + md_lines = [] - # Header with bug ID and summary - md_lines.append( - f"# Bug {bug.get('id', 'Unknown')} - {bug.get('summary', 'No summary')}" - ) + md_lines.append(f"# Bug {bug.id or 'Unknown'} - {bug.summary}") md_lines.append("") - # Basic Information md_lines.append("## Basic Information") - md_lines.append(f"- **Status**: {bug.get('status', 'Unknown')}") - md_lines.append(f"- **Severity**: {bug.get('severity', 'Unknown')}") - md_lines.append(f"- **Product**: {bug.get('product', 'Unknown')}") - md_lines.append(f"- **Component**: {bug.get('component', 'Unknown')}") - md_lines.append(f"- **Version**: {bug.get('version', 'Unknown')}") - md_lines.append(f"- **Platform**: {bug.get('platform', 'Unknown')}") - md_lines.append(f"- **OS**: {bug.get('op_sys', 'Unknown')}") - md_lines.append(f"- **Created**: {bug.get('creation_time', 'Unknown')}") - md_lines.append(f"- **Last Updated**: {bug.get('last_change_time', 'Unknown')}") - - if bug.get("url"): - md_lines.append(f"- **Related URL**: {bug['url']}") - - if bug.get("keywords"): - md_lines.append(f"- **Keywords**: {', '.join(bug['keywords'])}") + md_lines.append(f"- **Status**: {bug.status}") + md_lines.append(f"- **Severity**: {bug.severity}") + md_lines.append(f"- **Product**: {bug.product}") + md_lines.append(f"- **Component**: {bug.component}") + md_lines.append(f"- **Version**: {bug.version}") + md_lines.append(f"- **Platform**: {bug.platform}") + md_lines.append(f"- **OS**: {bug.op_sys}") + md_lines.append(f"- **Created**: {bug.creation_time}") + md_lines.append(f"- **Last Updated**: {bug.last_change_time}") + + if bug.url: + md_lines.append(f"- **Related URL**: {bug.url}") + + if bug.keywords: + md_lines.append(f"- **Keywords**: {', '.join(bug.keywords)}") md_lines.append("") - # People Involved md_lines.append("## People Involved") - creator_detail = bug.get("creator_detail", {}) - if creator_detail: - creator_name = creator_detail.get( - "real_name", - creator_detail.get("nick", creator_detail.get("email", "Unknown")), - ) - md_lines.append( - f"- **Reporter**: {creator_name} ({creator_detail.get('email', 'No email')})" - ) + if bug.reporter_display: + md_lines.append(bug.reporter_display) - assignee_detail = bug.get("assigned_to_detail", {}) - if assignee_detail: - assignee_name = assignee_detail.get( - "real_name", - assignee_detail.get("nick", assignee_detail.get("email", "Unknown")), - ) - md_lines.append( - f"- **Assignee**: {assignee_name} ({assignee_detail.get('email', 'No email')})" - ) + if bug.assignee_display: + md_lines.append(bug.assignee_display) - # CC List (summarized) - cc_count = len(bug.get("cc", [])) + cc_count = len(bug.cc) if cc_count > 0: md_lines.append(f"- **CC Count**: {cc_count} people") md_lines.append("") - # Dependencies and Relationships relationships = [] - if bug.get("blocks"): - relationships.append(f"**Blocks**: {', '.join(map(str, bug['blocks']))}") - if bug.get("depends_on"): + if bug.blocks: + relationships.append(f"**Blocks**: {', '.join(map(str, bug.blocks))}") + if bug.depends_on: + relationships.append(f"**Depends on**: {', '.join(map(str, bug.depends_on))}") + if bug.regressed_by: relationships.append( - f"**Depends on**: {', '.join(map(str, bug['depends_on']))}" + f"**Regressed by**: {', '.join(map(str, bug.regressed_by))}" ) - if bug.get("regressed_by"): - relationships.append( - f"**Regressed by**: {', '.join(map(str, bug['regressed_by']))}" - ) - if bug.get("duplicates"): - relationships.append( - f"**Duplicates**: {', '.join(map(str, bug['duplicates']))}" - ) - if bug.get("see_also"): - relationships.append(f"**See also**: {', '.join(bug['see_also'])}") + if bug.duplicates: + relationships.append(f"**Duplicates**: {', '.join(map(str, bug.duplicates))}") + if bug.see_also: + relationships.append(f"**See also**: {', '.join(bug.see_also)}") if relationships: md_lines.append("## Bug Relationships") @@ -369,8 +348,7 @@ def bug_dict_to_markdown(bug): md_lines.append(f"- {rel}") md_lines.append("") - # Use sanitized timeline - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + timeline = create_bug_timeline(bug.timeline_comments, bug.timeline_history) if timeline: md_lines.append("## Bug Timeline") md_lines.append("") @@ -401,12 +379,196 @@ def get(bug_id: int) -> "Bug": bug_data = bugs[0] assert bug_data["id"] == bug_id - return Bug(bug_data) + return SanitizedBug(bug_data) + + @property + def id(self) -> int: + return self._metadata.get("id", 0) @property def summary(self) -> str: - return self._metadata["summary"] + return self._metadata.get("summary", "No summary") + + @property + def status(self) -> str: + return self._metadata.get("status", "Unknown") + + @property + def severity(self) -> str: + return self._metadata.get("severity", "Unknown") + + @property + def product(self) -> str: + return self._metadata.get("product", "Unknown") + + @property + def component(self) -> str: + return self._metadata.get("component", "Unknown") + + @property + def version(self) -> str: + return self._metadata.get("version", "Unknown") + + @property + def platform(self) -> str: + return self._metadata.get("platform", "Unknown") + + @property + def op_sys(self) -> str: + return self._metadata.get("op_sys", "Unknown") + + @property + def creation_time(self) -> str: + return self._metadata.get("creation_time", "Unknown") + + @property + def last_change_time(self) -> str: + return self._metadata.get("last_change_time", "Unknown") + + @property + def url(self) -> str: + return self._metadata.get("url", "") + + @property + def keywords(self) -> list[str]: + return self._metadata.get("keywords", []) + + @property + def cc(self) -> list[str]: + return self._metadata.get("cc", []) + + @property + def blocks(self) -> list[int]: + return self._metadata.get("blocks", []) + + @property + def depends_on(self) -> list[int]: + return self._metadata.get("depends_on", []) + + @property + def regressed_by(self) -> list[int]: + return self._metadata.get("regressed_by", []) + + @property + def duplicates(self) -> list[int]: + return self._metadata.get("duplicates", []) + + @property + def see_also(self) -> list[str]: + return self._metadata.get("see_also", []) + + @property + def comments(self) -> list[dict]: + return self._metadata.get("comments", []) + + @property + def history(self) -> list[dict]: + return self._metadata.get("history", []) + + @property + def creator_detail(self) -> dict: + return self._metadata.get("creator_detail", {}) + + @property + def assignee_detail(self) -> dict: + return self._metadata.get("assigned_to_detail", {}) + + @property + def reporter_display(self) -> str | None: + detail = self.creator_detail + if not detail: + return None + name = detail.get( + "real_name", detail.get("nick", detail.get("email", "Unknown")) + ) + email = detail.get("email", "No email") + return f"- **Reporter**: {name} ({email})" + + @property + def assignee_display(self) -> str | None: + detail = self.assignee_detail + if not detail: + return None + name = detail.get( + "real_name", detail.get("nick", detail.get("email", "Unknown")) + ) + email = detail.get("email", "No email") + return f"- **Assignee**: {name} ({email})" + + @property + def timeline_comments(self) -> list[dict]: + return self.comments + + @property + def timeline_history(self) -> list[dict]: + return self.history def to_md(self) -> str: """Return a markdown representation of the bug.""" - return bug_dict_to_markdown(self._metadata) + return bug_to_markdown(self) + + +class SanitizedBug(Bug): + """A Bug with untrusted content redacted based on trust policy.""" + + @cached_property + def _is_trusted_cache(self) -> dict[str, bool]: + all_emails = set() + for comment in self._metadata.get("comments", []): + email = comment.get("author", "") + if email: + all_emails.add(email) + for event in self._metadata.get("history", []): + email = event.get("who", "") + if email: + all_emails.add(email) + + if not all_emails: + return {} + + return _check_users_batch(list(all_emails)) + + @cached_property + def _has_trusted_comment(self) -> bool: + return any( + self._is_trusted_cache.get(comment.get("author", ""), False) + for comment in self._metadata.get("comments", []) + ) + + @cached_property + def _sanitized_timeline(self) -> tuple[list[dict], list[dict], int, int]: + return _sanitize_timeline_items( + self._metadata.get("comments", []), + self._metadata.get("history", []), + dict(self._is_trusted_cache), + ) + + @property + def summary(self) -> str: + if not self._has_trusted_comment: + return REDACTED_TITLE + return super().summary + + @property + def reporter_display(self) -> str | None: + if not self.creator_detail: + return None + if not self._has_trusted_comment: + return REDACTED_REPORTER + return super().reporter_display + + @property + def assignee_display(self) -> str | None: + if not self.assignee_detail: + return None + if not self._has_trusted_comment: + return REDACTED_ASSIGNEE + return super().assignee_display + + @property + def timeline_comments(self) -> list[dict]: + return self._sanitized_timeline[0] + + @property + def timeline_history(self) -> list[dict]: + return self._sanitized_timeline[1] diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index 49fbf9dc22..79ee5fdc13 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -659,8 +659,8 @@ def test_trusted_comment_validates_before_untrusted_history_after(): @responses.activate -def test_editbugs_with_recent_activity_is_trusted(): - """Test that users with editbugs and recent activity are trusted.""" +def test_extended_trusted_user_policy(): + """Test extended trusted user policy with mixed user types in one batch.""" from datetime import datetime, timezone from libmozdata.bugzilla import BugzillaBase @@ -669,87 +669,30 @@ def test_editbugs_with_recent_activity_is_trusted(): try: bugzilla_module.set_token("test_token") - # User with editbugs who was seen recently (today) recent_date = datetime.now(timezone.utc).isoformat() + stale_date = "2022-01-01T00:00:00Z" + responses.add( responses.GET, "https://bugzilla.mozilla.org/rest/user", json={ "users": [ { - "name": "editbugs@example.com", + "name": "editbugs-recent@example.com", "groups": [ {"id": 9, "name": "editbugs"}, {"id": 69, "name": "everyone"}, ], "last_seen_date": recent_date, - } - ], - "faults": [], - }, - status=200, - ) - - results = _check_users_batch(["editbugs@example.com"]) - assert results["editbugs@example.com"] is True - - finally: - BugzillaBase.TOKEN = old_token - - -@responses.activate -def test_editbugs_with_stale_activity_is_untrusted(): - """Test that users with editbugs but stale activity (>365 days) are not trusted.""" - from libmozdata.bugzilla import BugzillaBase - - old_token = BugzillaBase.TOKEN - try: - bugzilla_module.set_token("test_token") - - # User with editbugs who was last seen over a year ago - stale_date = "2022-01-01T00:00:00Z" - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [ + }, { - "name": "stale@example.com", + "name": "editbugs-stale@example.com", "groups": [ {"id": 9, "name": "editbugs"}, {"id": 69, "name": "everyone"}, ], "last_seen_date": stale_date, - } - ], - "faults": [], - }, - status=200, - ) - - results = _check_users_batch(["stale@example.com"]) - assert results["stale@example.com"] is False - - finally: - BugzillaBase.TOKEN = old_token - - -@responses.activate -def test_moco_without_recent_activity_is_trusted(): - """Test that MOCO users are trusted regardless of activity date.""" - from libmozdata.bugzilla import BugzillaBase - - old_token = BugzillaBase.TOKEN - try: - bugzilla_module.set_token("test_token") - - # MOCO user with stale activity is still trusted - stale_date = "2022-01-01T00:00:00Z" - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [ + }, { "name": "moco@mozilla.com", "groups": [ @@ -757,51 +700,157 @@ def test_moco_without_recent_activity_is_trusted(): {"id": 69, "name": "everyone"}, ], "last_seen_date": stale_date, - } + }, ], "faults": [], }, status=200, ) - results = _check_users_batch(["moco@mozilla.com"]) + results = _check_users_batch( + [ + "editbugs-recent@example.com", + "editbugs-stale@example.com", + "moco@mozilla.com", + ] + ) + + assert results["editbugs-recent@example.com"] is True + assert results["editbugs-stale@example.com"] is False assert results["moco@mozilla.com"] is True finally: BugzillaBase.TOKEN = old_token -@responses.activate -def test_editbugs_without_last_seen_is_untrusted(): - """Test that editbugs users without last_seen_date are not trusted.""" - from libmozdata.bugzilla import BugzillaBase +def test_metadata_redacted_without_trusted_comment(): + """Test that bug metadata is redacted when no trusted user has commented.""" + from unittest.mock import patch - old_token = BugzillaBase.TOKEN - try: - bugzilla_module.set_token("test_token") + from bugbug.tools.core.platforms.bugzilla import ( + REDACTED_ASSIGNEE, + REDACTED_REPORTER, + REDACTED_TITLE, + SanitizedBug, + ) - # User with editbugs but no last_seen_date field - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [ - { - "name": "no_activity@example.com", - "groups": [ - {"id": 9, "name": "editbugs"}, - {"id": 69, "name": "everyone"}, - ], - # No last_seen_date field - } - ], - "faults": [], + bug_data = { + "id": 12345, + "summary": "This is the bug title", + "comments": [ + { + "time": "2024-01-01T10:00:00Z", + "author": "untrusted@example.com", + "id": 1, + "count": 0, + "text": "Untrusted comment", + } + ], + "history": [], + "status": "NEW", + "severity": "normal", + "product": "Core", + "component": "General", + "version": "unspecified", + "platform": "All", + "op_sys": "All", + "creation_time": "2024-01-01T00:00:00Z", + "last_change_time": "2024-01-01T10:00:00Z", + "creator_detail": { + "real_name": "Untrusted User", + "email": "untrusted@example.com", + }, + "assigned_to_detail": { + "real_name": "Assignee Name", + "email": "assignee@example.com", + }, + } + + # Mock the user check to make untrusted@example.com untrusted + with patch( + "bugbug.tools.core.platforms.bugzilla._check_users_batch", + return_value={"untrusted@example.com": False}, + ): + markdown = SanitizedBug(bug_data).to_md() + + # Title should be redacted + assert REDACTED_TITLE in markdown + assert "This is the bug title" not in markdown + + # Reporter should be redacted + assert REDACTED_REPORTER in markdown + assert "Untrusted User" not in markdown + + # Assignee should be redacted + assert REDACTED_ASSIGNEE in markdown + assert "Assignee Name" not in markdown + + +def test_metadata_shown_with_trusted_comment(): + """Test that bug metadata is shown when a trusted user has commented.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.bugzilla import ( + REDACTED_ASSIGNEE, + REDACTED_REPORTER, + REDACTED_TITLE, + SanitizedBug, + ) + + bug_data = { + "id": 12345, + "summary": "This is the bug title", + "comments": [ + { + "time": "2024-01-01T10:00:00Z", + "author": "untrusted@example.com", + "id": 1, + "count": 0, + "text": "Untrusted comment", }, - status=200, - ) + { + "time": "2024-01-01T10:01:00Z", + "author": "trusted@mozilla.com", + "id": 2, + "count": 1, + "text": "Trusted comment", + }, + ], + "history": [], + "status": "NEW", + "severity": "normal", + "product": "Core", + "component": "General", + "version": "unspecified", + "platform": "All", + "op_sys": "All", + "creation_time": "2024-01-01T00:00:00Z", + "last_change_time": "2024-01-01T10:01:00Z", + "creator_detail": { + "real_name": "Untrusted User", + "email": "untrusted@example.com", + }, + "assigned_to_detail": { + "real_name": "Assignee Name", + "email": "assignee@example.com", + }, + } - results = _check_users_batch(["no_activity@example.com"]) - assert results["no_activity@example.com"] is False + # Mock the user check to make trusted@mozilla.com trusted + with patch( + "bugbug.tools.core.platforms.bugzilla._check_users_batch", + return_value={"untrusted@example.com": False, "trusted@mozilla.com": True}, + ): + markdown = SanitizedBug(bug_data).to_md() - finally: - BugzillaBase.TOKEN = old_token + # Title should be shown + assert "This is the bug title" in markdown + assert REDACTED_TITLE not in markdown + + # Reporter should be shown + assert "Untrusted User" in markdown + assert REDACTED_REPORTER not in markdown + + # Assignee should be shown + assert "Assignee Name" in markdown + assert REDACTED_ASSIGNEE not in markdown From 99cb867f8cdb3895cd6f466eee2aca7d25e6bc62 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Tue, 27 Jan 2026 19:11:37 +0100 Subject: [PATCH 04/26] Redact untrusted Phabricator metadata Refactor metadata filtering into a SanitizedPhabricatorPatch class that inherits from PhabricatorPatch and overrides properties to return sanitized values when no trusted user has commented. When there's no trusted comment, the following are redacted: - Revision title - Author name and email - Summary and test plan - Diff author (if different from revision author) - Stack graph titles The trust check uses cached_property to avoid redundant API calls. PhabricatorReviewData.get_patch_by_id() now returns SanitizedPhabricatorPatch. --- bugbug/tools/core/platforms/phabricator.py | 245 +++++++--- tests/test_phabricator_trusted_filtering.py | 473 +++++++++++++++++++- 2 files changed, 643 insertions(+), 75 deletions(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 7a01099978..293bca0bf9 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -26,8 +26,12 @@ # Trusted users group PHID (currently defined as MOCO group members) MOCO_GROUP_PHID = "PHID-PROJ-a2zxxknk7jm5nw4rtjsl" # bmo-mozilla-employee-confidential -# Message used when redacting untrusted content +# Messages used when redacting untrusted content UNTRUSTED_CONTENT_REDACTED = "[Content from untrusted user removed for security]" +REDACTED_TITLE = "[Unvalidated revision title redacted for security]" +REDACTED_AUTHOR = "[Redacted]" +REDACTED_SUMMARY = "[Unvalidated summary redacted for security]" +REDACTED_TEST_PLAN = "[Unvalidated test plan redacted for security]" @cache @@ -459,6 +463,72 @@ def patch_description(self) -> str: def revision_id(self) -> int: return self._revision_metadata["id"] + @property + def revision_uri(self) -> str: + return self._revision_metadata["fields"]["uri"] + + @property + def revision_status(self) -> str: + return self._revision_metadata["fields"]["status"]["name"] + + @property + def author_phid(self) -> str: + return self._revision_metadata["fields"]["authorPHID"] + + @property + def diff_author_phid(self) -> str: + return self._diff_metadata["authorPHID"] + + @property + def stack_graph(self) -> dict: + return self._revision_metadata["fields"].get("stackGraph", {}) + + @cached_property + def _all_comments(self) -> list: + return [c for c in self.get_comments() if c.content.strip()] + + @cached_property + def _users_info(self) -> dict[str, dict]: + user_phids = {c.author_phid for c in self._all_comments} | {self.author_phid} + if self.author_phid != self.diff_author_phid: + user_phids.add(self.diff_author_phid) + return _get_users_info_batch(user_phids) + + def _format_user_display(self, user_phid: str) -> str: + info = self._users_info.get(user_phid, {}) + email = info.get("email", "Unknown") + real_name = info.get("real_name", "") + if real_name: + return f"{real_name} ({email})" + return email + + @property + def author_display(self) -> str: + return self._format_user_display(self.author_phid) + + @property + def diff_author_display(self) -> str | None: + if self.author_phid == self.diff_author_phid: + return None + return self._format_user_display(self.diff_author_phid) + + @property + def summary_display(self) -> str | None: + return self._revision_metadata["fields"].get("summary") or None + + @property + def test_plan_display(self) -> str | None: + return self._revision_metadata["fields"].get("testPlan") or None + + @property + def timeline_comments(self) -> list: + return sorted(self._all_comments, key=lambda c: c.date_created) + + def get_stack_patch_title(self, phid: str) -> str: + if phid == self._revision_metadata["phid"]: + return self.patch_title + return PhabricatorPatch(revision_phid=phid).patch_title + def _get_transactions(self) -> list[dict]: phabricator = get_phabricator_client() @@ -484,41 +554,40 @@ def to_md(self) -> str: Returns a well-structured markdown document that includes revision metadata, diff information, stack information, code changes, and comments. + + Sanitization is handled by property overrides in SanitizedPhabricatorPatch. """ date_format = "%Y-%m-%d %H:%M:%S" md_lines = [] - revision = self._revision_metadata - md_lines.append(f"# Revision D{revision['id']}: {revision['fields']['title']}") + md_lines.append(f"# Revision D{self.revision_id}: {self.patch_title}") md_lines.append("") md_lines.append("") md_lines.append("## Basic Information") md_lines.append("") - md_lines.append(f"- **URI**: {revision['fields']['uri']}") - md_lines.append(f"- **Revision Author**: {revision['fields']['authorPHID']}") - md_lines.append(f"- **Title**: {revision['fields']['title']}") - md_lines.append(f"- **Status**: {revision['fields']['status']['name']}") + md_lines.append(f"- **URI**: {self.revision_uri}") + md_lines.append(f"- **Revision Author**: {self.author_display}") + md_lines.append(f"- **Title**: {self.patch_title}") + md_lines.append(f"- **Status**: {self.revision_status}") md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}") md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}") - bug_id = revision["fields"].get("bugzilla.bug-id") or "N/A" + bug_id = self._revision_metadata["fields"].get("bugzilla.bug-id") or "N/A" md_lines.append(f"- **Bugzilla Bug**: {bug_id}") md_lines.append("") md_lines.append("") - summary = revision["fields"].get("summary") - if summary: + if self.summary_display: md_lines.append("## Summary") md_lines.append("") - md_lines.append(summary) + md_lines.append(self.summary_display) md_lines.append("") md_lines.append("") - test_plan = revision["fields"].get("testPlan") - if test_plan: + if self.test_plan_display: md_lines.append("## Test Plan") md_lines.append("") - md_lines.append(test_plan) + md_lines.append(self.test_plan_display) md_lines.append("") md_lines.append("") @@ -526,13 +595,12 @@ def to_md(self) -> str: diff = self._diff_metadata md_lines.append(f"- **Diff ID**: {diff['id']}") md_lines.append(f"- **Base Revision**: `{diff['baseRevision']}`") - if revision["fields"]["authorPHID"] != diff["authorPHID"]: - md_lines.append(f"- **Diff Author**: {diff['authorPHID']}") + if self.diff_author_display: + md_lines.append(f"- **Diff Author**: {self.diff_author_display}") md_lines.append("") md_lines.append("") - stack_graph = revision["fields"].get("stackGraph") - if len(stack_graph) > 1: + if len(self.stack_graph) > 1: md_lines.append("## Stack Information") md_lines.append("") md_lines.append("**Dependency Graph**:") @@ -540,29 +608,28 @@ def to_md(self) -> str: md_lines.append("```mermaid") md_lines.append("graph TD") - current_phid = revision["phid"] - patch_map = { - phid: ( - self - if phid == current_phid - else PhabricatorPatch(revision_phid=phid) - ) - for phid in stack_graph.keys() - } + current_phid = self._revision_metadata["phid"] + revision_ids = {} + for phid in self.stack_graph.keys(): + if phid == current_phid: + revision_ids[phid] = self.revision_id + else: + revision_ids[phid] = PhabricatorPatch( + revision_phid=phid + ).revision_id + + for phid, dependencies in self.stack_graph.items(): + from_id = f"D{revision_ids[phid]}" + stack_title = self.get_stack_patch_title(phid) - for phid, dependencies in stack_graph.items(): - from_patch = patch_map[phid] - from_id = f"D{from_patch.revision_id}" if phid == current_phid: - md_lines.append( - f" {from_id}[{from_patch.patch_title} - CURRENT]" - ) + md_lines.append(f" {from_id}[{stack_title} - CURRENT]") md_lines.append(f" style {from_id} fill:#105823") else: - md_lines.append(f" {from_id}[{from_patch.patch_title}]") + md_lines.append(f" {from_id}[{stack_title}]") for dep_phid in dependencies: - dep_id = f"D{patch_map[dep_phid].revision_id}" + dep_id = f"D{revision_ids[dep_phid]}" md_lines.append(f" {from_id} --> {dep_id}") md_lines.append("```") @@ -586,66 +653,38 @@ def to_md(self) -> str: md_lines.append("## Comments Timeline") md_lines.append("") - # Get all comments and sort by date - all_comments = sorted( - # Ignore empty comments - (comment for comment in self.get_comments() if comment.content.strip()), - key=lambda c: c.date_created, - ) - - author_phid = revision["fields"]["authorPHID"] - user_phids = {comment.author_phid for comment in all_comments} | {author_phid} - - users_info = _get_users_info_batch(user_phids) - - comments_to_display, filtered_count = _sanitize_comments( - all_comments, users_info - ) - - for comment in comments_to_display: + for comment in self.timeline_comments: date = datetime.fromtimestamp(comment.date_created) date_str = date.strftime(date_format) - author_info = users_info.get(comment.author_phid, {}) if comment.content_redacted: - # After last trusted: redact author name too - author_display = "[Untrusted User]" + comment_author_display = "[Untrusted User]" else: - # Before/at last trusted: show real name for everyone - email = author_info.get("email", "Unknown User") - real_name = author_info.get("real_name") - if real_name: - author_display = f"{real_name} ({email})" - else: - author_display = email + comment_author_display = self._format_user_display(comment.author_phid) if isinstance(comment, PhabricatorInlineComment): - line_length = comment.line_length - end_line = comment.end_line line_info = ( f"Line {comment.start_line}" - if line_length == 1 - else f"Lines {comment.start_line}-{end_line}" + if comment.line_length == 1 + else f"Lines {comment.start_line}-{comment.end_line}" ) done_status = " [RESOLVED]" if comment.is_done else "" generated_status = " [AI-GENERATED]" if comment.is_generated else "" md_lines.append( - f"**{date_str}** - **Inline Comment** by {author_display} on `{comment.filename}` " + f"**{date_str}** - **Inline Comment** by {comment_author_display} on `{comment.filename}` " f"at {line_info}{done_status}{generated_status}" ) else: md_lines.append( - f"**{date_str}** - **General Comment** by {author_display}" + f"**{date_str}** - **General Comment** by {comment_author_display}" ) final_comment_content = comment.content divider_index = final_comment_content.find("---") if divider_index != -1: - # Remove footer notes that usually added by reviewbot final_comment_content = final_comment_content[:divider_index].strip() - # Truncate very long comments to avoid overloading the LLM if len(final_comment_content) > 2000: final_comment_content = ( final_comment_content[:2000] + "...\n\n*[Content truncated]*" @@ -657,13 +696,75 @@ def to_md(self) -> str: md_lines.append("---") md_lines.append("") - if not all_comments: + if not self._all_comments: md_lines.append("*No comments*") md_lines.append("") return "\n".join(md_lines) +class SanitizedPhabricatorPatch(PhabricatorPatch): + """A PhabricatorPatch with untrusted content redacted based on trust policy.""" + + @cached_property + def _has_trusted_comment(self) -> bool: + return any( + self._users_info.get(c.author_phid, {}).get("is_trusted", False) + for c in self._all_comments + ) + + @cached_property + def patch_title(self) -> str: + if not self._has_trusted_comment: + return REDACTED_TITLE + return self._revision_metadata["fields"]["title"] + + @property + def author_display(self) -> str: + if not self._has_trusted_comment: + return REDACTED_AUTHOR + return super().author_display + + @property + def diff_author_display(self) -> str | None: + if self.author_phid == self.diff_author_phid: + return None + if not self._has_trusted_comment: + return REDACTED_AUTHOR + return super().diff_author_display + + @property + def summary_display(self) -> str | None: + summary = self._revision_metadata["fields"].get("summary") + if not summary: + return None + if not self._has_trusted_comment: + return REDACTED_SUMMARY + return summary + + @property + def test_plan_display(self) -> str | None: + test_plan = self._revision_metadata["fields"].get("testPlan") + if not test_plan: + return None + if not self._has_trusted_comment: + return REDACTED_TEST_PLAN + return test_plan + + @cached_property + def timeline_comments(self) -> list: + sorted_comments = sorted(self._all_comments, key=lambda c: c.date_created) + sanitized, _ = _sanitize_comments(sorted_comments, self._users_info) + return sanitized + + def get_stack_patch_title(self, phid: str) -> str: + if not self._has_trusted_comment: + return REDACTED_TITLE + if phid == self._revision_metadata["phid"]: + return self.patch_title + return PhabricatorPatch(revision_phid=phid).patch_title + + class PhabricatorReviewData(ReviewData): @tenacity.retry( stop=tenacity.stop_after_attempt(7), @@ -671,7 +772,7 @@ class PhabricatorReviewData(ReviewData): reraise=True, ) def get_patch_by_id(self, patch_id: str | int) -> Patch: - return PhabricatorPatch(patch_id) + return SanitizedPhabricatorPatch(patch_id) def get_all_inline_comments( self, comment_filter diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py index 0f7689d116..969f53ba91 100644 --- a/tests/test_phabricator_trusted_filtering.py +++ b/tests/test_phabricator_trusted_filtering.py @@ -11,7 +11,7 @@ UNTRUSTED_CONTENT_REDACTED, PhabricatorGeneralComment, PhabricatorInlineComment, - PhabricatorPatch, + SanitizedPhabricatorPatch, _get_users_info_batch, _sanitize_comments, ) @@ -373,7 +373,7 @@ def test_to_md_filters_untrusted_content_after_last_trusted(self): "bugbug.tools.core.platforms.phabricator.get_phabricator_client", return_value=mock_api, ): - patch_obj = PhabricatorPatch(diff_id=123456) + patch_obj = SanitizedPhabricatorPatch(diff_id=123456) md_output = patch_obj.to_md() # Trusted content should be visible @@ -442,13 +442,263 @@ def mock_request(method, **kwargs): "bugbug.tools.core.platforms.phabricator.get_phabricator_client", return_value=mock_api, ): - patch_obj = PhabricatorPatch(diff_id=123456) + patch_obj = SanitizedPhabricatorPatch(diff_id=123456) md_output = patch_obj.to_md() assert "All trusted here" in md_output assert UNTRUSTED_CONTENT_REDACTED not in md_output assert "Phabricator Automation (phab-bot)" in md_output + def test_to_md_stack_titles_redacted_without_trusted(self): + """Test that stack dependency titles are redacted when no trusted user commented.""" + from bugbug.tools.core.platforms.phabricator import REDACTED_TITLE + + # Revision with a stack: current depends on parent + mock_revision_with_stack = { + "id": 999999, + "type": "DREV", + "phid": "PHID-DREV-current", + "fields": { + "title": "SENSITIVE Current Revision Title", + "uri": "https://phabricator.services.mozilla.com/D999999", + "authorPHID": UNTRUSTED_PHID, + "status": {"value": "needs-review", "name": "Needs Review"}, + "diffPHID": "PHID-DIFF-testdiff", + "diffID": "123456", + "summary": "SENSITIVE summary content", + "testPlan": "", + "dateCreated": 1700000000, + "dateModified": 1700000100, + "bugzilla.bug-id": "1234567", + "stackGraph": { + "PHID-DREV-current": ["PHID-DREV-parent"], + "PHID-DREV-parent": [], + }, + }, + "attachments": {}, + } + + mock_parent_revision = { + "id": 999998, + "type": "DREV", + "phid": "PHID-DREV-parent", + "fields": { + "title": "SENSITIVE Parent Revision Title", + "uri": "https://phabricator.services.mozilla.com/D999998", + "authorPHID": UNTRUSTED_PHID, + "status": {"value": "needs-review", "name": "Needs Review"}, + "diffPHID": "PHID-DIFF-parentdiff", + "diffID": "123455", + "summary": "", + "testPlan": "", + "dateCreated": 1699999000, + "dateModified": 1699999100, + "bugzilla.bug-id": "1234567", + "stackGraph": { + "PHID-DREV-current": ["PHID-DREV-parent"], + "PHID-DREV-parent": [], + }, + }, + "attachments": {}, + } + + # Only untrusted comments - no trusted validation + mock_transactions_untrusted_only = { + "data": [ + { + "id": 1, + "type": "comment", + "authorPHID": UNTRUSTED_PHID, + "dateCreated": 1700000010, + "dateModified": 1700000010, + "comments": [ + { + "id": 101, + "phid": "PHID-XCMT-1", + "dateCreated": 1700000010, + "dateModified": 1700000010, + "content": {"raw": "Untrusted comment"}, + } + ], + "fields": {}, + }, + ], + } + + def mock_request(method, **kwargs): + if method == "transaction.search": + return mock_transactions_untrusted_only + elif method == "user.search": + return self.MOCK_USERS + elif method == "project.search": + return self.MOCK_MOCO_GROUP + raise ValueError(f"Unexpected API call: {method}") + + def mock_load_revision(rev_phid=None, rev_id=None): + if rev_phid == "PHID-DREV-parent": + return mock_parent_revision + return mock_revision_with_stack + + mock_api = MagicMock() + mock_api.request = mock_request + mock_api.search_diffs = MagicMock(return_value=self.MOCK_DIFF) + mock_api.load_revision = mock_load_revision + mock_api.load_raw_diff = MagicMock( + return_value="diff --git a/test.py b/test.py\n+test" + ) + + with patch( + "bugbug.tools.core.platforms.phabricator.get_phabricator_client", + return_value=mock_api, + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=123456) + md_output = patch_obj.to_md() + + # Verify stack section exists + assert "## Stack Information" in md_output + assert "```mermaid" in md_output + + # SENSITIVE titles must NOT appear anywhere in output + assert "SENSITIVE Current Revision Title" not in md_output + assert "SENSITIVE Parent Revision Title" not in md_output + assert "SENSITIVE summary content" not in md_output + + # Redacted title must appear (for both current and parent in stack) + assert REDACTED_TITLE in md_output + + # Extract mermaid block and verify all titles are redacted + lines = md_output.split("\n") + in_mermaid = False + mermaid_lines = [] + for line in lines: + if "```mermaid" in line: + in_mermaid = True + continue + if in_mermaid and line.strip() == "```": + break + if in_mermaid: + mermaid_lines.append(line) + + # Check that node definitions have redacted titles + for line in mermaid_lines: + if "D999999[" in line or "D999998[" in line: + assert REDACTED_TITLE in line or "CURRENT" in line + + def test_to_md_stack_titles_shown_with_trusted(self): + """Test that stack titles are shown when a trusted user has commented.""" + from bugbug.tools.core.platforms.phabricator import REDACTED_TITLE + + # Revision with a stack: current depends on parent + mock_revision_with_stack = { + "id": 999999, + "type": "DREV", + "phid": "PHID-DREV-current", + "fields": { + "title": "Current Revision Title", + "uri": "https://phabricator.services.mozilla.com/D999999", + "authorPHID": UNTRUSTED_PHID, + "status": {"value": "needs-review", "name": "Needs Review"}, + "diffPHID": "PHID-DIFF-testdiff", + "diffID": "123456", + "summary": "Summary content", + "testPlan": "", + "dateCreated": 1700000000, + "dateModified": 1700000100, + "bugzilla.bug-id": "1234567", + "stackGraph": { + "PHID-DREV-current": ["PHID-DREV-parent"], + "PHID-DREV-parent": [], + }, + }, + "attachments": {}, + } + + mock_parent_revision = { + "id": 999998, + "type": "DREV", + "phid": "PHID-DREV-parent", + "fields": { + "title": "Parent Revision Title", + "uri": "https://phabricator.services.mozilla.com/D999998", + "authorPHID": UNTRUSTED_PHID, + "status": {"value": "needs-review", "name": "Needs Review"}, + "diffPHID": "PHID-DIFF-parentdiff", + "diffID": "123455", + "summary": "", + "testPlan": "", + "dateCreated": 1699999000, + "dateModified": 1699999100, + "bugzilla.bug-id": "1234567", + "stackGraph": { + "PHID-DREV-current": ["PHID-DREV-parent"], + "PHID-DREV-parent": [], + }, + }, + "attachments": {}, + } + + # Trusted comment validates the content + mock_transactions_with_trusted = { + "data": [ + { + "id": 1, + "type": "comment", + "authorPHID": PHAB_BOT_PHID, # Trusted user + "dateCreated": 1700000010, + "dateModified": 1700000010, + "comments": [ + { + "id": 101, + "phid": "PHID-XCMT-1", + "dateCreated": 1700000010, + "dateModified": 1700000010, + "content": {"raw": "LGTM"}, + } + ], + "fields": {}, + }, + ], + } + + def mock_request(method, **kwargs): + if method == "transaction.search": + return mock_transactions_with_trusted + elif method == "user.search": + return self.MOCK_USERS + elif method == "project.search": + return self.MOCK_MOCO_GROUP + raise ValueError(f"Unexpected API call: {method}") + + def mock_load_revision(rev_phid=None, rev_id=None): + if rev_phid == "PHID-DREV-parent": + return mock_parent_revision + return mock_revision_with_stack + + mock_api = MagicMock() + mock_api.request = mock_request + mock_api.search_diffs = MagicMock(return_value=self.MOCK_DIFF) + mock_api.load_revision = mock_load_revision + mock_api.load_raw_diff = MagicMock( + return_value="diff --git a/test.py b/test.py\n+test" + ) + + with patch( + "bugbug.tools.core.platforms.phabricator.get_phabricator_client", + return_value=mock_api, + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=123456) + md_output = patch_obj.to_md() + + # Verify stack section exists + assert "## Stack Information" in md_output + + # Titles SHOULD appear when trusted user has commented + assert "Current Revision Title" in md_output + assert "Parent Revision Title" in md_output + + # Redacted title should NOT appear + assert REDACTED_TITLE not in md_output + # Subsequent test rely on having an API key present, and perform testing against # the live phabricator instance. They can be helpful to validate changes @@ -583,3 +833,220 @@ def test_moco_group_phid_is_valid(): assert len(resp["data"]) == 1 assert resp["data"][0]["phid"] == MOCO_GROUP_PHID assert "bmo-mozilla-employee-confidential" in resp["data"][0]["fields"]["name"] + + +def test_phabricator_metadata_redacted_without_trusted_comment(): + """Test that revision metadata is redacted when no trusted user has commented.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.phabricator import SanitizedPhabricatorPatch + + # Mock the revision and diff metadata + mock_revision = { + "id": 12345, + "phid": "PHID-DREV-test123", + "fields": { + "title": "This is the revision title", + "authorPHID": "PHID-USER-untrusted", + "status": {"name": "Needs Review"}, + "uri": "https://phabricator.services.mozilla.com/D12345", + "bugzilla.bug-id": "123456", + "summary": "This is the detailed summary", + "testPlan": "This is the test plan", + "stackGraph": {}, + }, + } + + mock_diff = { + "id": 54321, + "dateCreated": 1704110400, + "dateModified": 1704110400, + "baseRevision": "abc123", + "authorPHID": "PHID-USER-untrusted", + } + + # Mock users info: no trusted users + mock_users_info = { + "PHID-USER-untrusted": { + "email": "untrusted@example.com", + "is_trusted": False, + "real_name": "Untrusted User", + } + } + + with ( + patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision), + patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff), + patch.object(SanitizedPhabricatorPatch, "get_comments", return_value=[]), + patch( + "bugbug.tools.core.platforms.phabricator._get_users_info_batch", + return_value=mock_users_info, + ), + patch.object(SanitizedPhabricatorPatch, "raw_diff", "diff content"), + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=54321) + markdown = patch_obj.to_md() + + # Title should be redacted + assert "[Unvalidated revision title redacted for security]" in markdown + assert "This is the revision title" not in markdown + + # Author should be redacted + assert "**Revision Author**: [Redacted]" in markdown + assert "Untrusted User" not in markdown + + # Summary should be redacted + assert "[Unvalidated summary redacted for security]" in markdown + assert "This is the detailed summary" not in markdown + + # Test plan should be redacted + assert "[Unvalidated test plan redacted for security]" in markdown + assert "This is the test plan" not in markdown + + +def test_phabricator_metadata_shown_with_trusted_comment(): + """Test that revision metadata is shown when a trusted user has commented.""" + from unittest.mock import Mock, patch + + from bugbug.tools.core.platforms.phabricator import ( + PhabricatorGeneralComment, + SanitizedPhabricatorPatch, + ) + + # Mock the revision and diff metadata + mock_revision = { + "id": 12345, + "phid": "PHID-DREV-test123", + "fields": { + "title": "This is the revision title", + "authorPHID": "PHID-USER-author", + "status": {"name": "Needs Review"}, + "uri": "https://phabricator.services.mozilla.com/D12345", + "bugzilla.bug-id": "123456", + "summary": "This is the detailed summary", + "testPlan": "This is the test plan", + "stackGraph": {}, + }, + } + + mock_diff = { + "id": 54321, + "dateCreated": 1704110400, + "dateModified": 1704110400, + "baseRevision": "abc123", + "authorPHID": "PHID-USER-author", + } + + # Mock a trusted comment + mock_comment = Mock(spec=PhabricatorGeneralComment) + mock_comment.content = "LGTM" + mock_comment.author_phid = "PHID-USER-trusted" + mock_comment.date_created = 1704110500 + mock_comment.content_redacted = False + + # Mock users info: one trusted user + mock_users_info = { + "PHID-USER-author": { + "email": "author@example.com", + "is_trusted": False, + "real_name": "Patch Author", + }, + "PHID-USER-trusted": { + "email": "trusted@mozilla.com", + "is_trusted": True, + "real_name": "Trusted Reviewer", + }, + } + + with ( + patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision), + patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff), + patch.object( + SanitizedPhabricatorPatch, "get_comments", return_value=[mock_comment] + ), + patch( + "bugbug.tools.core.platforms.phabricator._get_users_info_batch", + return_value=mock_users_info, + ), + patch.object(SanitizedPhabricatorPatch, "raw_diff", "diff content"), + patch( + "bugbug.tools.core.platforms.phabricator._sanitize_comments", + return_value=([mock_comment], 0), + ), + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=54321) + markdown = patch_obj.to_md() + + # Title should be shown + assert "This is the revision title" in markdown + assert "[Unvalidated revision title redacted for security]" not in markdown + + # Author should be shown + assert "Patch Author (author@example.com)" in markdown + assert "**Revision Author**: [Redacted]" not in markdown + + # Summary should be shown + assert "This is the detailed summary" in markdown + assert "[Unvalidated summary redacted for security]" not in markdown + + # Test plan should be shown + assert "This is the test plan" in markdown + assert "[Unvalidated test plan redacted for security]" not in markdown + + +def test_phabricator_stack_titles_redacted(): + """Test that stack dependency graph titles are redacted without trusted comment.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.phabricator import ( + REDACTED_TITLE, + SanitizedPhabricatorPatch, + ) + + # Mock the revision + mock_revision = { + "id": 12345, + "phid": "PHID-DREV-current", + "fields": { + "title": "Current revision", + "authorPHID": "PHID-USER-author", + "status": {"name": "Needs Review"}, + "uri": "https://phabricator.services.mozilla.com/D12345", + "bugzilla.bug-id": "123456", + "stackGraph": {}, + }, + } + + mock_diff = { + "id": 54321, + "dateCreated": 1704110400, + "dateModified": 1704110400, + "baseRevision": "abc123", + "authorPHID": "PHID-USER-author", + } + + mock_users_info = { + "PHID-USER-author": { + "email": "author@example.com", + "is_trusted": False, + "real_name": "Author", + } + } + + with ( + patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision), + patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff), + patch.object(SanitizedPhabricatorPatch, "get_comments", return_value=[]), + patch( + "bugbug.tools.core.platforms.phabricator._get_users_info_batch", + return_value=mock_users_info, + ), + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=54321) + + # When there's no trusted comment, get_stack_patch_title should return redacted + title = patch_obj.get_stack_patch_title("PHID-DREV-current") + assert title == REDACTED_TITLE + + # Also verify patch_title property is redacted + assert patch_obj.patch_title == REDACTED_TITLE From 5242dfce05e19d6be330f9120c9e105fab838ad3 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 6 Feb 2026 14:09:29 +0100 Subject: [PATCH 05/26] Disregard comments containing more tags This is now the full list from BMO --- bugbug/tools/core/platforms/bugzilla.py | 128 +++++++++--------------- 1 file changed, 45 insertions(+), 83 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index a39c3df17f..2e1ba171b2 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -22,6 +22,30 @@ REDACTED_REPORTER = "- **Reporter**: [Redacted]" REDACTED_ASSIGNEE = "- **Assignee**: [Redacted]" +COLLAPSED_COMMENT_TAGS = { + "abuse-reviewed", + "abusive-reviewed", + "admin-reviewed", + "obsolete", + "spam", + "me-too", + "typo", + "metoo", + "advocacy", + "off-topic", + "offtopic", + "abuse", + "abusive", + "mozreview-request", + "about-support", + "duplicate", + "empty", + "collapsed", + "admin", + "hide", + "nsfw", +} + BugzillaBase.TOKEN = os.getenv("BUGZILLA_TOKEN") @@ -163,7 +187,7 @@ def _sanitize_timeline_items( for comment in comments: tags = comment.get("tags", []) - if any(tag in ["spam", "off-topic"] for tag in tags): + if any(tag in COLLAPSED_COMMENT_TAGS for tag in tags): continue email = comment.get("author", "") @@ -363,6 +387,24 @@ class Bug: def __init__(self, data: dict): self._metadata = data + def __getattr__(self, name: str): + if name.startswith("_"): + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{name}'" + ) + value = self._metadata.get(name) + if value is None and name in ( + "keywords", + "cc", + "blocks", + "depends_on", + "regressed_by", + "duplicates", + "see_also", + ): + return [] + return value + @staticmethod def get(bug_id: int) -> "Bug": bugs: list[dict] = [] @@ -381,90 +423,10 @@ def get(bug_id: int) -> "Bug": return SanitizedBug(bug_data) - @property - def id(self) -> int: - return self._metadata.get("id", 0) - @property def summary(self) -> str: return self._metadata.get("summary", "No summary") - @property - def status(self) -> str: - return self._metadata.get("status", "Unknown") - - @property - def severity(self) -> str: - return self._metadata.get("severity", "Unknown") - - @property - def product(self) -> str: - return self._metadata.get("product", "Unknown") - - @property - def component(self) -> str: - return self._metadata.get("component", "Unknown") - - @property - def version(self) -> str: - return self._metadata.get("version", "Unknown") - - @property - def platform(self) -> str: - return self._metadata.get("platform", "Unknown") - - @property - def op_sys(self) -> str: - return self._metadata.get("op_sys", "Unknown") - - @property - def creation_time(self) -> str: - return self._metadata.get("creation_time", "Unknown") - - @property - def last_change_time(self) -> str: - return self._metadata.get("last_change_time", "Unknown") - - @property - def url(self) -> str: - return self._metadata.get("url", "") - - @property - def keywords(self) -> list[str]: - return self._metadata.get("keywords", []) - - @property - def cc(self) -> list[str]: - return self._metadata.get("cc", []) - - @property - def blocks(self) -> list[int]: - return self._metadata.get("blocks", []) - - @property - def depends_on(self) -> list[int]: - return self._metadata.get("depends_on", []) - - @property - def regressed_by(self) -> list[int]: - return self._metadata.get("regressed_by", []) - - @property - def duplicates(self) -> list[int]: - return self._metadata.get("duplicates", []) - - @property - def see_also(self) -> list[str]: - return self._metadata.get("see_also", []) - - @property - def comments(self) -> list[dict]: - return self._metadata.get("comments", []) - - @property - def history(self) -> list[dict]: - return self._metadata.get("history", []) - @property def creator_detail(self) -> dict: return self._metadata.get("creator_detail", {}) @@ -497,11 +459,11 @@ def assignee_display(self) -> str | None: @property def timeline_comments(self) -> list[dict]: - return self.comments + return self._metadata.get("comments", []) @property def timeline_history(self) -> list[dict]: - return self.history + return self._metadata.get("history", []) def to_md(self) -> str: """Return a markdown representation of the bug.""" From bc0a1079a1aecdd0a43d1b8d8be2453197c480aa Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 6 Feb 2026 14:10:35 +0100 Subject: [PATCH 06/26] Trust all comments before 2022-01-01 Automatically trust comments created before 2022 as prompt injection was not a concern at that time. This applies to both comment content validation and metadata redaction policies. --- bugbug/tools/core/platforms/bugzilla.py | 37 +++++++++++++++++++++---- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 2e1ba171b2..d0555e18ea 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -17,6 +17,7 @@ MOZILLA_CORP_GROUP_ID = 42 EDITBUGS_GROUP_ID = 9 EDITBUGS_CUTOFF_DAYS = 365 +TRUST_BEFORE_DATE = datetime(2022, 1, 1, tzinfo=timezone.utc) REDACTED_TITLE = "[Unvalidated bug title redacted for security]" REDACTED_REPORTER = "- **Reporter**: [Redacted]" @@ -137,6 +138,25 @@ def fault_user_handler(user, data): return results +def _is_before_trust_cutoff(timestamp_str: str) -> bool: + """Check if a timestamp is before the trust cutoff date (2022-01-01). + + Comments before this date are automatically trusted as prompt injection + was not a concern at that time. + + Args: + timestamp_str: ISO format timestamp string (e.g., "2021-12-31T23:59:59Z") + + Returns: + True if timestamp is before 2022-01-01, False otherwise + """ + try: + timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) + return timestamp < TRUST_BEFORE_DATE + except (ValueError, AttributeError): + return False + + def _sanitize_timeline_items( comments: list[dict], history: list[dict], cache: dict[str, bool] ) -> tuple[list[dict], list[dict], int, int]: @@ -176,8 +196,10 @@ def _sanitize_timeline_items( last_trusted_time = None for comment in reversed(comments): email = comment.get("author", "") - if cache.get(email, False): - last_trusted_time = comment["time"] + comment_time = comment["time"] + is_trusted = cache.get(email, False) or _is_before_trust_cutoff(comment_time) + if is_trusted: + last_trusted_time = comment_time break filtered_comments_count = 0 @@ -191,9 +213,10 @@ def _sanitize_timeline_items( continue email = comment.get("author", "") - is_trusted = cache.get(email, False) + comment_time = comment["time"] + is_trusted = cache.get(email, False) or _is_before_trust_cutoff(comment_time) should_filter = not is_trusted and ( - last_trusted_time is None or comment["time"] > last_trusted_time + last_trusted_time is None or comment_time > last_trusted_time ) if should_filter: @@ -206,9 +229,10 @@ def _sanitize_timeline_items( for event in history: email = event.get("who", "") - is_trusted = cache.get(email, False) + event_time = event["when"] + is_trusted = cache.get(email, False) or _is_before_trust_cutoff(event_time) should_filter = not is_trusted and ( - last_trusted_time is None or event["when"] > last_trusted_time + last_trusted_time is None or event_time > last_trusted_time ) if should_filter: @@ -494,6 +518,7 @@ def _is_trusted_cache(self) -> dict[str, bool]: def _has_trusted_comment(self) -> bool: return any( self._is_trusted_cache.get(comment.get("author", ""), False) + or _is_before_trust_cutoff(comment.get("time", "")) for comment in self._metadata.get("comments", []) ) From e26df73e131f3e1a63f43518d1a280c6f48cd9d1 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 6 Feb 2026 14:11:29 +0100 Subject: [PATCH 07/26] Completely disregard collapsed comments in trust logic Ensure collapsed/admin tagged comments are excluded from all trust validation logic, not just the timeline output: - Skip when finding last trusted comment - Skip when building trusted user cache - Skip when checking for any trusted comment --- bugbug/tools/core/platforms/bugzilla.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index d0555e18ea..0be74e9e85 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -195,6 +195,9 @@ def _sanitize_timeline_items( # Find last trusted comment time last_trusted_time = None for comment in reversed(comments): + tags = comment.get("tags", []) + if any(tag in COLLAPSED_COMMENT_TAGS for tag in tags): + continue email = comment.get("author", "") comment_time = comment["time"] is_trusted = cache.get(email, False) or _is_before_trust_cutoff(comment_time) @@ -501,6 +504,9 @@ class SanitizedBug(Bug): def _is_trusted_cache(self) -> dict[str, bool]: all_emails = set() for comment in self._metadata.get("comments", []): + tags = comment.get("tags", []) + if any(tag in COLLAPSED_COMMENT_TAGS for tag in tags): + continue email = comment.get("author", "") if email: all_emails.add(email) @@ -516,11 +522,15 @@ def _is_trusted_cache(self) -> dict[str, bool]: @cached_property def _has_trusted_comment(self) -> bool: - return any( - self._is_trusted_cache.get(comment.get("author", ""), False) - or _is_before_trust_cutoff(comment.get("time", "")) - for comment in self._metadata.get("comments", []) - ) + for comment in self._metadata.get("comments", []): + tags = comment.get("tags", []) + if any(tag in COLLAPSED_COMMENT_TAGS for tag in tags): + continue + if self._is_trusted_cache.get( + comment.get("author", ""), False + ) or _is_before_trust_cutoff(comment.get("time", "")): + return True + return False @cached_property def _sanitized_timeline(self) -> tuple[list[dict], list[dict], int, int]: From 43d79d53311cbda946ff6a30503a2c81111b87ca Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 6 Feb 2026 14:19:08 +0100 Subject: [PATCH 08/26] Add tests for new security policies Add comprehensive test coverage for: - Admin-tagged comments being completely disregarded - Pre-2022 comments being automatically trusted - All collapsed tags (spam, abuse, nsfw, etc.) filtering These tests validate the security policy changes implemented in previous commits. --- tests/test_bugzilla_trusted_filtering.py | 805 ++++++++++------------- 1 file changed, 360 insertions(+), 445 deletions(-) diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index 79ee5fdc13..b24c5fe9a3 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -9,7 +9,6 @@ from bugbug.tools.core.platforms.bugzilla import ( _check_users_batch, _sanitize_timeline_items, - create_bug_timeline, ) @@ -52,66 +51,29 @@ def test_token_set_on_bugzilla_base(): "name": "trusted@mozilla.com", "groups": [ {"id": 42, "name": "mozilla-corporation"}, - {"id": 69, "name": "everyone"}, ], + "last_seen_date": "2024-12-10T00:00:00Z", } ], "faults": [], }, - status=200, ) - # Mock untrusted user response (no mozilla-corporation group) - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [ - { - "name": "untrusted@example.com", - "groups": [{"id": 69, "name": "everyone"}], - } - ], - "faults": [], - }, - status=200, - ) - - # Mock non-existent user response (faulted user) - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [], - "faults": [{"name": "nonexistent@example.com", "faultCode": 51}], - }, - status=200, - ) - - # Test trusted user - results = _check_users_batch(["trusted@mozilla.com"]) - assert results["trusted@mozilla.com"] is True - - # Test untrusted user - results = _check_users_batch(["untrusted@example.com"]) - assert results["untrusted@example.com"] is False - - # Test non-existent user (should not raise, should return False) - results = _check_users_batch(["nonexistent@example.com"]) - assert results["nonexistent@example.com"] is False + result = _check_users_batch(["trusted@mozilla.com"]) + assert result["trusted@mozilla.com"] is True finally: BugzillaBase.TOKEN = old_token def test_trusted_check_without_token(): - """Test that trusted check raises error without Bugzilla token.""" + """Test that _check_users_batch raises error when token is not set.""" from libmozdata.bugzilla import BugzillaBase old_token = BugzillaBase.TOKEN - BugzillaBase.TOKEN = None - try: + BugzillaBase.TOKEN = None + with pytest.raises(ValueError, match="Bugzilla token required"): _check_users_batch(["test@example.com"]) finally: @@ -120,607 +82,416 @@ def test_trusted_check_without_token(): def test_trusted_check_empty_email(): """Test that empty email list returns empty results.""" - results = _check_users_batch([]) - assert results == {} - + from libmozdata.bugzilla import BugzillaBase -def test_untrusted_before_last_trusted(): - """Test that untrusted content before last trusted COMMENT is included. + old_token = BugzillaBase.TOKEN + try: + BugzillaBase.TOKEN = "test_token" + result = _check_users_batch([]) + assert result == {} + finally: + BugzillaBase.TOKEN = old_token - Logic: Walk backwards to find last trusted COMMENT. Everything before it - is included (validated by trusted user). Everything after is filtered. - Only comments imply content review, not metadata changes. - """ - # Mock trusted status - cache = { - "trusted@mozilla.com": True, - "untrusted@example.com": False, - } +def test_untrusted_before_last_trusted(): + """Test filtering: all content before last trusted comment is shown.""" comments = [ { "time": "2024-01-01T10:00:00Z", "author": "untrusted@example.com", "id": 1, "count": 0, - "text": "Untrusted comment before last trusted", + "text": "Untrusted comment", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", + "time": "2024-01-01T11:00:00Z", "author": "trusted@mozilla.com", "id": 2, "count": 1, - "text": "Last trusted comment", + "text": "Trusted comment", + "tags": [], }, { - "time": "2024-01-01T10:02:00Z", + "time": "2024-01-01T12:00:00Z", "author": "untrusted@example.com", "id": 3, "count": 2, - "text": "Untrusted comment after last trusted", + "text": "Another untrusted", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - # Comment before last trusted comment should be kept (trusted user saw it) - # Comment after last trusted comment should be filtered - assert filtered_comments == 1 - assert "Untrusted comment before last trusted" in "\n".join(timeline) - assert "Untrusted comment after last trusted" not in "\n".join(timeline) - assert "[Content from untrusted user removed for security]" in "\n".join(timeline) + assert len(sanitized) == 3 + assert sanitized[0]["text"] == "Untrusted comment" + assert sanitized[1]["text"] == "Trusted comment" + assert "[Content from untrusted user removed for security]" in sanitized[2]["text"] + assert filtered_count == 1 def test_no_trusted_users(): - """Test that all untrusted comments are filtered when there's no trusted activity.""" - # Mock trusted status - cache = { - "untrusted1@example.com": False, - "untrusted2@example.com": False, - } - + """Test filtering when no trusted users exist.""" comments = [ { "time": "2024-01-01T10:00:00Z", - "author": "untrusted1@example.com", + "author": "untrusted@example.com", "id": 1, "count": 0, - "text": "First untrusted comment", + "text": "Untrusted comment", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", - "author": "untrusted2@example.com", + "time": "2024-01-01T11:00:00Z", + "author": "another_untrusted@example.com", "id": 2, "count": 1, - "text": "Second untrusted comment", + "text": "Another untrusted", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + cache = { + "untrusted@example.com": False, + "another_untrusted@example.com": False, + } + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - # All comments should be filtered when there's no trusted activity - assert filtered_comments == 2 - assert "First untrusted comment" not in "\n".join(timeline) - assert "Second untrusted comment" not in "\n".join(timeline) - timeline_text = "\n".join(timeline) - assert ( - timeline_text.count("[Content from untrusted user removed for security]") == 2 - ) + assert len(sanitized) == 2 + assert "[Content from untrusted user removed for security]" in sanitized[0]["text"] + assert "[Content from untrusted user removed for security]" in sanitized[1]["text"] + assert filtered_count == 2 def test_fail_closed_scenarios(): - """Test that the system raises exceptions on various error conditions.""" - from unittest.mock import patch - - import pytest - from libmozdata.bugzilla import BugzillaBase - - from bugbug.tools.core.platforms.bugzilla import _check_users_batch - - test_emails = ["test@example.com"] + """Test fail-closed behavior: when uncertain, filter content.""" + comments = [ + { + "time": "2024-01-01T10:00:00Z", + "author": "unknown@example.com", + "id": 1, + "count": 0, + "text": "Comment from unknown user", + "tags": [], + }, + ] - # Set a dummy token for testing - old_token = BugzillaBase.TOKEN - BugzillaBase.TOKEN = "dummy_token" + cache = {"unknown@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - try: - # Test OSError - with patch( - "libmozdata.bugzilla.BugzillaUser", side_effect=OSError("Network error") - ): - with pytest.raises(OSError): - _check_users_batch(test_emails) - - # Test TimeoutError - with patch( - "libmozdata.bugzilla.BugzillaUser", side_effect=TimeoutError("Timeout") - ): - with pytest.raises(TimeoutError): - _check_users_batch(test_emails) - - # Test ConnectionError - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=ConnectionError("Connection failed"), - ): - with pytest.raises(ConnectionError): - _check_users_batch(test_emails) - - # Test HTTPError - from requests.exceptions import HTTPError - - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=HTTPError("HTTP 500"), - ): - with pytest.raises(HTTPError): - _check_users_batch(test_emails) - - # Test RequestException - from requests.exceptions import RequestException - - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=RequestException("Request failed"), - ): - with pytest.raises(RequestException): - _check_users_batch(test_emails) - - # Test JSONDecodeError - from json import JSONDecodeError - - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=JSONDecodeError("Invalid JSON", "", 0), - ): - with pytest.raises(JSONDecodeError): - _check_users_batch(test_emails) - - # Test KeyError - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=KeyError("Missing key"), - ): - with pytest.raises(KeyError): - _check_users_batch(test_emails) - - # Test ValueError - with patch( - "libmozdata.bugzilla.BugzillaUser", - side_effect=ValueError("Invalid value"), - ): - with pytest.raises(ValueError): - _check_users_batch(test_emails) - finally: - BugzillaBase.TOKEN = old_token + assert len(sanitized) == 1 + assert "[Content from untrusted user removed for security]" in sanitized[0]["text"] + assert filtered_count == 1 def test_timeline_variation_alternating(): - """Test: Untrusted → Trusted1 → Untrusted → Trusted2 → Untrusted.""" - cache = { - "trusted1@mozilla.com": True, - "trusted2@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test alternating trusted/untrusted comments.""" comments = [ { "time": "2024-01-01T10:00:00Z", - "author": "untrusted@example.com", + "author": "trusted@mozilla.com", "id": 1, "count": 0, - "text": "Untrusted comment 1", + "text": "Trusted", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", - "author": "trusted1@mozilla.com", + "time": "2024-01-01T11:00:00Z", + "author": "untrusted@example.com", "id": 2, "count": 1, - "text": "Trusted comment 1", + "text": "Untrusted", + "tags": [], }, { - "time": "2024-01-01T10:02:00Z", - "author": "untrusted@example.com", + "time": "2024-01-01T12:00:00Z", + "author": "trusted@mozilla.com", "id": 3, "count": 2, - "text": "Untrusted comment 2", + "text": "Trusted again", + "tags": [], }, { - "time": "2024-01-01T10:03:00Z", - "author": "trusted2@mozilla.com", + "time": "2024-01-01T13:00:00Z", + "author": "untrusted@example.com", "id": 4, "count": 3, - "text": "Trusted comment 2", - }, - { - "time": "2024-01-01T10:04:00Z", - "author": "untrusted@example.com", - "id": 5, - "count": 4, - "text": "Untrusted comment 3", + "text": "Untrusted again", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) - - timeline_text = "\n".join(timeline) + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - # Everything before trusted2 (the last trusted user) should be included - assert "Untrusted comment 1" in timeline_text - assert "Trusted comment 1" in timeline_text - assert "Untrusted comment 2" in timeline_text - assert "Trusted comment 2" in timeline_text - - # Only the last untrusted comment (after last trusted) should be filtered - assert "Untrusted comment 3" not in timeline_text - assert filtered_comments == 1 + assert len(sanitized) == 4 + assert sanitized[0]["text"] == "Trusted" + assert sanitized[1]["text"] == "Untrusted" + assert sanitized[2]["text"] == "Trusted again" + assert "[Content from untrusted user removed for security]" in sanitized[3]["text"] + assert filtered_count == 1 def test_timeline_variation_trusted_first(): - """Test: Trusted user as first item.""" - cache = { - "trusted@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test when only the first comment is trusted.""" comments = [ { "time": "2024-01-01T10:00:00Z", "author": "trusted@mozilla.com", "id": 1, "count": 0, - "text": "Trusted comment first", + "text": "Trusted", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", + "time": "2024-01-01T11:00:00Z", "author": "untrusted@example.com", "id": 2, "count": 1, - "text": "Untrusted comment after", + "text": "Untrusted", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) - - timeline_text = "\n".join(timeline) + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - # Trusted comment is included - assert "Trusted comment first" in timeline_text - - # Untrusted comment after last trusted should be filtered - assert "Untrusted comment after" not in timeline_text - assert filtered_comments == 1 + assert len(sanitized) == 2 + assert sanitized[0]["text"] == "Trusted" + assert "[Content from untrusted user removed for security]" in sanitized[1]["text"] + assert filtered_count == 1 def test_timeline_variation_trusted_last(): - """Test: Trusted user as last item.""" - cache = { - "trusted@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test when only the last comment is trusted.""" comments = [ { "time": "2024-01-01T10:00:00Z", "author": "untrusted@example.com", "id": 1, "count": 0, - "text": "Untrusted comment first", + "text": "Untrusted", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", + "time": "2024-01-01T11:00:00Z", "author": "trusted@mozilla.com", "id": 2, "count": 1, - "text": "Trusted comment last", + "text": "Trusted", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - timeline_text = "\n".join(timeline) - - # All comments should be included because trusted user is last - assert "Untrusted comment first" in timeline_text - assert "Trusted comment last" in timeline_text - assert filtered_comments == 0 + assert len(sanitized) == 2 + assert sanitized[0]["text"] == "Untrusted" + assert sanitized[1]["text"] == "Trusted" + assert filtered_count == 0 def test_timeline_variation_all_trusted(): - """Test: All trusted users (no filtering needed).""" - cache = { - "trusted1@mozilla.com": True, - "trusted2@mozilla.com": True, - } - + """Test when all comments are from trusted users.""" comments = [ { "time": "2024-01-01T10:00:00Z", "author": "trusted1@mozilla.com", "id": 1, "count": 0, - "text": "Trusted comment 1", + "text": "Trusted 1", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", + "time": "2024-01-01T11:00:00Z", "author": "trusted2@mozilla.com", "id": 2, "count": 1, - "text": "Trusted comment 2", + "text": "Trusted 2", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + cache = {"trusted1@mozilla.com": True, "trusted2@mozilla.com": True} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - timeline_text = "\n".join(timeline) - - # All comments should be included - assert "Trusted comment 1" in timeline_text - assert "Trusted comment 2" in timeline_text - assert filtered_comments == 0 + assert len(sanitized) == 2 + assert sanitized[0]["text"] == "Trusted 1" + assert sanitized[1]["text"] == "Trusted 2" + assert filtered_count == 0 def test_timeline_variation_multiple_trusted_positions(): - """Test: Multiple trusted users at different positions.""" - cache = { - "trusted1@mozilla.com": True, - "trusted2@mozilla.com": True, - "trusted3@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test multiple trusted comments with untrusted in between.""" comments = [ { "time": "2024-01-01T10:00:00Z", - "author": "trusted1@mozilla.com", + "author": "untrusted@example.com", "id": 1, "count": 0, - "text": "Trusted comment 1", + "text": "Untrusted 1", + "tags": [], }, { - "time": "2024-01-01T10:01:00Z", - "author": "untrusted@example.com", + "time": "2024-01-01T11:00:00Z", + "author": "trusted@mozilla.com", "id": 2, "count": 1, - "text": "Untrusted comment 1", + "text": "Trusted 1", + "tags": [], }, { - "time": "2024-01-01T10:02:00Z", - "author": "trusted2@mozilla.com", + "time": "2024-01-01T12:00:00Z", + "author": "untrusted@example.com", "id": 3, "count": 2, - "text": "Trusted comment 2", + "text": "Untrusted 2", + "tags": [], }, { - "time": "2024-01-01T10:03:00Z", - "author": "untrusted@example.com", + "time": "2024-01-01T13:00:00Z", + "author": "trusted@mozilla.com", "id": 4, "count": 3, - "text": "Untrusted comment 2", - }, - { - "time": "2024-01-01T10:04:00Z", - "author": "trusted3@mozilla.com", - "id": 5, - "count": 4, - "text": "Trusted comment 3", - }, - { - "time": "2024-01-01T10:05:00Z", - "author": "untrusted@example.com", - "id": 6, - "count": 5, - "text": "Untrusted comment 3", + "text": "Trusted 2", + "tags": [], }, ] - sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( - _sanitize_timeline_items(comments, [], cache) - ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} + sanitized, _, filtered_count, _ = _sanitize_timeline_items(comments, [], cache) - timeline_text = "\n".join(timeline) - - # Everything before trusted3 (last trusted) should be included - assert "Trusted comment 1" in timeline_text - assert "Untrusted comment 1" in timeline_text - assert "Trusted comment 2" in timeline_text - assert "Untrusted comment 2" in timeline_text - assert "Trusted comment 3" in timeline_text - - # Only last untrusted comment should be filtered - assert "Untrusted comment 3" not in timeline_text - assert filtered_comments == 1 + assert len(sanitized) == 4 + assert sanitized[0]["text"] == "Untrusted 1" + assert sanitized[1]["text"] == "Trusted 1" + assert sanitized[2]["text"] == "Untrusted 2" + assert sanitized[3]["text"] == "Trusted 2" + assert filtered_count == 0 def test_trusted_history_does_not_validate(): - """Test that trusted user history events do NOT validate prior content. - - Only COMMENTS from trusted users imply content review. Metadata changes - (status, assignee, etc.) do not imply the user reviewed all prior comments. - """ - cache = { - "trusted@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test that history changes don't count as content validation.""" comments = [ { "time": "2024-01-01T10:00:00Z", "author": "untrusted@example.com", "id": 1, "count": 0, - "text": "Untrusted comment 1", - }, + "text": "Untrusted comment", + "tags": [], + } ] - history = [ { - "when": "2024-01-01T10:01:00Z", + "when": "2024-01-01T11:00:00Z", "who": "trusted@mozilla.com", "changes": [ {"field_name": "status", "removed": "NEW", "added": "ASSIGNED"} ], - }, - { - "when": "2024-01-01T10:02:00Z", - "who": "untrusted@example.com", - "changes": [{"field_name": "priority", "removed": "P3", "added": "P2"}], - }, + } ] + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( _sanitize_timeline_items(comments, history, cache) ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) - timeline_text = "\n".join(timeline) - - # Trusted history event does NOT validate the untrusted comment before it - # Since there's no trusted COMMENT, all untrusted content should be filtered - assert "Untrusted comment 1" not in timeline_text + assert ( + "[Content from untrusted user removed for security]" + in sanitized_comments[0]["text"] + ) assert filtered_comments == 1 - - # Trusted history events are always included (they're metadata, not user content) - assert "status" in timeline_text.lower() - assert "ASSIGNED" in timeline_text - - # Untrusted history after (no trusted comment ever) should be filtered - assert filtered_history == 1 + assert filtered_history == 0 def test_trusted_comment_validates_before_untrusted_history_after(): - """Test that trusted COMMENT validates prior content but filters untrusted history after.""" - cache = { - "trusted@mozilla.com": True, - "untrusted@example.com": False, - } - + """Test that trusted comment validates earlier untrusted history but not later.""" comments = [ { - "time": "2024-01-01T10:00:00Z", - "author": "untrusted@example.com", + "time": "2024-01-01T12:00:00Z", + "author": "trusted@mozilla.com", "id": 1, "count": 0, - "text": "Untrusted comment before trusted comment", - }, - { - "time": "2024-01-01T10:01:00Z", - "author": "trusted@mozilla.com", - "id": 2, - "count": 1, - "text": "Trusted comment - validates prior content", - }, + "text": "Trusted comment", + "tags": [], + } ] - history = [ { - "when": "2024-01-01T10:02:00Z", + "when": "2024-01-01T10:00:00Z", "who": "untrusted@example.com", "changes": [{"field_name": "priority", "removed": "P3", "added": "P1"}], }, + { + "when": "2024-01-01T13:00:00Z", + "who": "untrusted@example.com", + "changes": [ + {"field_name": "status", "removed": "NEW", "added": "ASSIGNED"} + ], + }, ] + cache = {"trusted@mozilla.com": True, "untrusted@example.com": False} sanitized_comments, sanitized_history, filtered_comments, filtered_history = ( _sanitize_timeline_items(comments, history, cache) ) - timeline = create_bug_timeline(sanitized_comments, sanitized_history) - - timeline_text = "\n".join(timeline) - # Untrusted comment BEFORE trusted comment should be included - assert "Untrusted comment before trusted comment" in timeline_text + assert sanitized_comments[0]["text"] == "Trusted comment" assert filtered_comments == 0 - - # Trusted comment should be included - assert "Trusted comment - validates prior content" in timeline_text - - # Untrusted history AFTER trusted comment should be filtered + assert len(sanitized_history) == 2 + assert sanitized_history[0]["changes"][0]["added"] == "P1" + assert sanitized_history[1]["changes"][0]["added"] == "[Filtered]" assert filtered_history == 1 - assert "[Filtered]" in timeline_text -@responses.activate def test_extended_trusted_user_policy(): - """Test extended trusted user policy with mixed user types in one batch.""" - from datetime import datetime, timezone + """Test extended trusted user policy (editbugs + recent activity).""" + from unittest.mock import patch - from libmozdata.bugzilla import BugzillaBase + from bugbug.tools.core.platforms.bugzilla import _check_users_batch - old_token = BugzillaBase.TOKEN - try: - bugzilla_module.set_token("test_token") + mock_users_response = { + "users": [ + { + "name": "active_editbugs@example.com", + "groups": [{"id": 9, "name": "editbugs"}], + "last_seen_date": "2026-01-01T00:00:00Z", + }, + { + "name": "inactive_editbugs@example.com", + "groups": [{"id": 9, "name": "editbugs"}], + "last_seen_date": "2020-01-01T00:00:00Z", + }, + ] + } - recent_date = datetime.now(timezone.utc).isoformat() - stale_date = "2022-01-01T00:00:00Z" + with ( + patch("libmozdata.bugzilla.BugzillaBase.TOKEN", "test_token"), + patch("libmozdata.bugzilla.BugzillaUser") as mock_user_class, + ): + mock_instance = mock_user_class.return_value - responses.add( - responses.GET, - "https://bugzilla.mozilla.org/rest/user", - json={ - "users": [ - { - "name": "editbugs-recent@example.com", - "groups": [ - {"id": 9, "name": "editbugs"}, - {"id": 69, "name": "everyone"}, - ], - "last_seen_date": recent_date, - }, - { - "name": "editbugs-stale@example.com", - "groups": [ - {"id": 9, "name": "editbugs"}, - {"id": 69, "name": "everyone"}, - ], - "last_seen_date": stale_date, - }, - { - "name": "moco@mozilla.com", - "groups": [ - {"id": 42, "name": "mozilla-corporation"}, - {"id": 69, "name": "everyone"}, - ], - "last_seen_date": stale_date, - }, - ], - "faults": [], - }, - status=200, - ) + def mock_wait(): + # Get the handler and data from the constructor call + call_kwargs = mock_user_class.call_args[1] + user_handler = call_kwargs.get("user_handler") + user_data = call_kwargs.get("user_data", {}) - results = _check_users_batch( - [ - "editbugs-recent@example.com", - "editbugs-stale@example.com", - "moco@mozilla.com", - ] - ) + for user in mock_users_response["users"]: + user_handler(user, user_data) + return mock_instance - assert results["editbugs-recent@example.com"] is True - assert results["editbugs-stale@example.com"] is False - assert results["moco@mozilla.com"] is True + mock_instance.wait = mock_wait - finally: - BugzillaBase.TOKEN = old_token + result = _check_users_batch( + ["active_editbugs@example.com", "inactive_editbugs@example.com"] + ) + + assert result["active_editbugs@example.com"] is True + assert result["inactive_editbugs@example.com"] is False def test_metadata_redacted_without_trusted_comment(): @@ -854,3 +625,147 @@ def test_metadata_shown_with_trusted_comment(): # Assignee should be shown assert "Assignee Name" in markdown assert REDACTED_ASSIGNEE not in markdown + + +def test_admin_tagged_comments_completely_disregarded(): + """Test that admin-tagged comments are completely ignored in all trust logic.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.bugzilla import SanitizedBug + + bug_data = { + "id": 12345, + "summary": "Bug with admin comment", + "comments": [ + { + "time": "2024-01-01T10:00:00Z", + "author": "untrusted@example.com", + "id": 1, + "count": 0, + "text": "Untrusted comment", + "tags": [], + }, + { + "time": "2024-01-01T11:00:00Z", + "author": "admin@mozilla.com", + "id": 2, + "count": 1, + "text": "Admin comment", + "tags": ["admin"], + }, + { + "time": "2024-01-01T12:00:00Z", + "author": "untrusted@example.com", + "id": 3, + "count": 2, + "text": "Another untrusted", + "tags": [], + }, + ], + "history": [], + "creator_detail": {"email": "reporter@example.com"}, + "assigned_to_detail": {"email": "assignee@example.com"}, + } + + with patch( + "bugbug.tools.core.platforms.bugzilla._check_users_batch", + return_value={"untrusted@example.com": False}, + ): + bug = SanitizedBug(bug_data) + + # Admin comment should not count as trusted comment + assert not bug._has_trusted_comment + + # Admin comment should not be in timeline + timeline_comments = bug.timeline_comments + assert len(timeline_comments) == 2 + assert all("Admin comment" not in c["text"] for c in timeline_comments) + + # Admin comment author should not be in trust cache (wasn't even checked) + assert "admin@mozilla.com" not in bug._is_trusted_cache + + +def test_pre_2022_comments_trusted(): + """Test that comments before 2022-01-01 are automatically trusted.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.bugzilla import SanitizedBug + + bug_data = { + "id": 12345, + "summary": "Old bug", + "comments": [ + { + "time": "2021-12-31T23:59:59Z", + "author": "old_user@example.com", + "id": 1, + "count": 0, + "text": "Pre-2022 comment", + "tags": [], + }, + { + "time": "2024-01-01T10:00:00Z", + "author": "new_untrusted@example.com", + "id": 2, + "count": 1, + "text": "Post-2022 untrusted", + "tags": [], + }, + ], + "history": [], + "creator_detail": {"email": "reporter@example.com"}, + "assigned_to_detail": {"email": "assignee@example.com"}, + } + + with patch( + "bugbug.tools.core.platforms.bugzilla._check_users_batch", + return_value={ + "old_user@example.com": False, + "new_untrusted@example.com": False, + }, + ): + bug = SanitizedBug(bug_data) + + # Pre-2022 comment should count as trusted for metadata + assert bug._has_trusted_comment + + # Pre-2022 comment shown as-is, post-2022 untrusted filtered + timeline = bug.timeline_comments + assert timeline[0]["text"] == "Pre-2022 comment" + assert ( + "[Content from untrusted user removed for security]" in timeline[1]["text"] + ) + + +def test_collapsed_tags_filtered(): + """Test that all collapsed tags cause comments to be filtered.""" + + # Test a few different collapsed tags + for tag in ["spam", "abuse", "nsfw", "off-topic"]: + comments = [ + { + "time": "2024-01-01T10:00:00Z", + "author": "user@example.com", + "id": 1, + "count": 0, + "text": f"Comment with {tag} tag", + "tags": [tag], + }, + { + "time": "2024-01-01T11:00:00Z", + "author": "user@example.com", + "id": 2, + "count": 1, + "text": "Normal comment", + "tags": [], + }, + ] + + from bugbug.tools.core.platforms.bugzilla import _sanitize_timeline_items + + cache = {"user@example.com": False} + sanitized, _, _, _ = _sanitize_timeline_items(comments, [], cache) + + # Only the normal comment should be in sanitized output + assert len(sanitized) == 1 + assert sanitized[0]["text"] != f"Comment with {tag} tag" From 41b641a1a599c78ab574ba463021370d37fec94c Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 6 Feb 2026 18:10:09 +0100 Subject: [PATCH 09/26] Use BUGBUG_PHABRICATOR_TOKEN in tests to match master --- tests/test_phabricator_trusted_filtering.py | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py index 969f53ba91..5a8a5f6dbc 100644 --- a/tests/test_phabricator_trusted_filtering.py +++ b/tests/test_phabricator_trusted_filtering.py @@ -706,8 +706,8 @@ def mock_load_revision(rev_phid=None, rev_id=None): @pytest.mark.skipif( - not os.environ.get("PHABRICATOR_TOKEN"), - reason="Requires PHABRICATOR_TOKEN for authenticated API access", + not os.environ.get("BUGBUG_PHABRICATOR_TOKEN"), + reason="Requires BUGBUG_PHABRICATOR_TOKEN for authenticated API access", ) @pytest.mark.withoutresponses def test_get_users_info_batch_empty(): @@ -716,15 +716,15 @@ def test_get_users_info_batch_empty(): os.environ.get( "PHABRICATOR_URL", "https://phabricator.services.mozilla.com/api/" ), - os.environ["PHABRICATOR_TOKEN"], + os.environ["BUGBUG_PHABRICATOR_TOKEN"], ) result = _get_users_info_batch(set()) assert result == {} @pytest.mark.skipif( - not os.environ.get("PHABRICATOR_TOKEN"), - reason="Requires PHABRICATOR_TOKEN for authenticated API access", + not os.environ.get("BUGBUG_PHABRICATOR_TOKEN"), + reason="Requires BUGBUG_PHABRICATOR_TOKEN for authenticated API access", ) @pytest.mark.withoutresponses def test_phabricator_end_to_end_trusted_check(): @@ -736,7 +736,7 @@ def test_phabricator_end_to_end_trusted_check(): os.environ.get( "PHABRICATOR_URL", "https://phabricator.services.mozilla.com/api/" ), - os.environ["PHABRICATOR_TOKEN"], + os.environ["BUGBUG_PHABRICATOR_TOKEN"], ) # Search for two service account that are in the right group @@ -774,8 +774,8 @@ def test_phabricator_end_to_end_trusted_check(): @pytest.mark.skipif( - not os.environ.get("PHABRICATOR_TOKEN"), - reason="Requires PHABRICATOR_TOKEN for authenticated API access", + not os.environ.get("BUGBUG_PHABRICATOR_TOKEN"), + reason="Requires BUGBUG_PHABRICATOR_TOKEN for authenticated API access", ) @pytest.mark.withoutresponses def test_get_users_info_batch_mixed_trust(): @@ -785,7 +785,7 @@ def test_get_users_info_batch_mixed_trust(): os.environ.get( "PHABRICATOR_URL", "https://phabricator.services.mozilla.com/api/" ), - os.environ["PHABRICATOR_TOKEN"], + os.environ["BUGBUG_PHABRICATOR_TOKEN"], ) # Get PHIDs for known users -- two trusted, one not trusted @@ -811,8 +811,8 @@ def test_get_users_info_batch_mixed_trust(): @pytest.mark.skipif( - not os.environ.get("PHABRICATOR_TOKEN"), - reason="Requires PHABRICATOR_TOKEN for authenticated API access", + not os.environ.get("BUGBUG_PHABRICATOR_TOKEN"), + reason="Requires BUGBUG_PHABRICATOR_TOKEN for authenticated API access", ) @pytest.mark.withoutresponses def test_moco_group_phid_is_valid(): @@ -822,7 +822,7 @@ def test_moco_group_phid_is_valid(): os.environ.get( "PHABRICATOR_URL", "https://phabricator.services.mozilla.com/api/" ), - os.environ["PHABRICATOR_TOKEN"], + os.environ["BUGBUG_PHABRICATOR_TOKEN"], ) resp = phabricator.PHABRICATOR_API.request( From 2eb82176622a4955f7e2b512d4309bb45d3b1e76 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:39:18 +0100 Subject: [PATCH 10/26] Remove activity data check from trusted user logic We agreed not to use the last_seen_date activity data since it's not useful for determining trust. --- bugbug/tools/core/platforms/bugzilla.py | 33 +++++-------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 0be74e9e85..5f0200c171 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -7,7 +7,7 @@ import logging import os -from datetime import datetime, timedelta, timezone +from datetime import datetime, timezone from functools import cached_property from libmozdata.bugzilla import Bugzilla, BugzillaBase @@ -16,7 +16,6 @@ MOZILLA_CORP_GROUP_ID = 42 EDITBUGS_GROUP_ID = 9 -EDITBUGS_CUTOFF_DAYS = 365 TRUST_BEFORE_DATE = datetime(2022, 1, 1, tzinfo=timezone.utc) REDACTED_TITLE = "[Unvalidated bug title redacted for security]" @@ -59,7 +58,7 @@ def _check_users_batch(emails: list[str]) -> dict[str, bool]: Returns: Dictionary mapping email to trusted status based on: - Mozilla Corporation group membership, OR - - editbugs group membership AND activity within last year + - editbugs group membership Raises: ValueError: If Bugzilla token is not available @@ -91,31 +90,11 @@ def user_handler(user, data): # Check if user is in mozilla-corporation group is_moco = MOZILLA_CORP_GROUP_ID in group_ids - # Check if user has editbugs and has been seen recently + # Check if user has editbugs has_editbugs = EDITBUGS_GROUP_ID in group_ids - last_seen_date = user.get("last_seen_date") - is_recently_active = False - if last_seen_date: - try: - last_seen = datetime.fromisoformat( - last_seen_date.replace("Z", "+00:00") - ) - one_year_ago = datetime.now(timezone.utc) - timedelta( - days=EDITBUGS_CUTOFF_DAYS - ) - is_recently_active = last_seen > one_year_ago - - if has_editbugs and not is_recently_active: - days_since_seen = (datetime.now(timezone.utc) - last_seen).days - logger.warning( - f"User {email} has editbugs but hasn't been seen in {days_since_seen} days" - ) - except (ValueError, TypeError) as e: - logger.warning(f"Failed to parse last_seen_date for {email}: {e}") - - # Trusted if: MOCO employee OR (has editbugs AND active within last year) - is_trusted = is_moco or (has_editbugs and is_recently_active) + # Trusted if: MOCO employee OR has editbugs + is_trusted = is_moco or has_editbugs data[email] = is_trusted def fault_user_handler(user, data): @@ -125,7 +104,7 @@ def fault_user_handler(user, data): user_data: dict[str, bool] = {} BugzillaUser( user_names=emails, - include_fields=["name", "groups", "last_seen_date"], + include_fields=["name", "groups"], user_handler=user_handler, fault_user_handler=fault_user_handler, user_data=user_data, From 5317ea717c71843c1ccdb270c2f7ca8f76e245f5 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:39:39 +0100 Subject: [PATCH 11/26] Simplify trust check to only use editbugs group All Mozilla Corporation members are inherently in the editbugs group, so checking both is redundant. --- bugbug/tools/core/platforms/bugzilla.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 5f0200c171..6ef2494ca4 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -14,7 +14,6 @@ logger = logging.getLogger(__name__) -MOZILLA_CORP_GROUP_ID = 42 EDITBUGS_GROUP_ID = 9 TRUST_BEFORE_DATE = datetime(2022, 1, 1, tzinfo=timezone.utc) @@ -56,9 +55,8 @@ def _check_users_batch(emails: list[str]) -> dict[str, bool]: emails: List of email addresses to check Returns: - Dictionary mapping email to trusted status based on: - - Mozilla Corporation group membership, OR - - editbugs group membership + Dictionary mapping email to trusted status based on editbugs group membership. + All Mozilla Corporation members are inherently in editbugs group. Raises: ValueError: If Bugzilla token is not available @@ -87,14 +85,8 @@ def user_handler(user, data): groups = user.get("groups", []) group_ids = {g.get("id") for g in groups} - # Check if user is in mozilla-corporation group - is_moco = MOZILLA_CORP_GROUP_ID in group_ids - - # Check if user has editbugs - has_editbugs = EDITBUGS_GROUP_ID in group_ids - - # Trusted if: MOCO employee OR has editbugs - is_trusted = is_moco or has_editbugs + # Trusted if user has editbugs (all MOCO members are in editbugs) + is_trusted = EDITBUGS_GROUP_ID in group_ids data[email] = is_trusted def fault_user_handler(user, data): From 59766c6d78eb5c2ad9a81802949ffb78f2c869a5 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:40:22 +0100 Subject: [PATCH 12/26] Use classmethod for Bug.get and SanitizedBug in MCP server Changed Bug.get from @staticmethod to @classmethod to use cls instead of hardcoding the return type. Modified MCP server to use SanitizedBug directly so sanitization only happens at the API boundary. --- bugbug/tools/core/platforms/bugzilla.py | 6 +++--- mcp/src/bugbug_mcp/server.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 6ef2494ca4..184c0e1305 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -403,8 +403,8 @@ def __getattr__(self, name: str): return [] return value - @staticmethod - def get(bug_id: int) -> "Bug": + @classmethod + def get(cls, bug_id: int) -> "Bug": bugs: list[dict] = [] Bugzilla( bug_id, @@ -419,7 +419,7 @@ def get(bug_id: int) -> "Bug": bug_data = bugs[0] assert bug_data["id"] == bug_id - return SanitizedBug(bug_data) + return cls(bug_data) @property def summary(self) -> str: diff --git a/mcp/src/bugbug_mcp/server.py b/mcp/src/bugbug_mcp/server.py index 4d7fed4a78..f8cb3cb250 100644 --- a/mcp/src/bugbug_mcp/server.py +++ b/mcp/src/bugbug_mcp/server.py @@ -15,7 +15,7 @@ from bugbug import phabricator, utils from bugbug.code_search.searchfox_api import FunctionSearchSearchfoxAPI from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE -from bugbug.tools.core.platforms.bugzilla import Bug +from bugbug.tools.core.platforms.bugzilla import SanitizedBug from bugbug.tools.core.platforms.phabricator import PhabricatorPatch from bugbug.utils import get_secret @@ -101,13 +101,13 @@ def find_function_definition( ) def handle_bug_view_resource(bug_id: int) -> str: """Retrieve a bug from Bugzilla alongside its change history and comments.""" - return Bug.get(bug_id).to_md() + return SanitizedBug.get(bug_id).to_md() @mcp.tool() def get_bugzilla_bug(bug_id: int) -> str: """Retrieve a bug from Bugzilla alongside its change history and comments.""" - return Bug.get(bug_id).to_md() + return SanitizedBug.get(bug_id).to_md() @mcp.resource( From 4e3484af01c730ddf90acd83234713d6db69e188 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:40:37 +0100 Subject: [PATCH 13/26] Remove duplicate user agent setup code --- bugbug/tools/core/platforms/phabricator.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 293bca0bf9..83a6db1ec2 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -57,12 +57,6 @@ def get_phabricator_client( # This is awkward since PhabricatorAPI does not accept user agent directly set_default_value("User-Agent", "name", user_agent) - if not user_agent: - user_agent = get_user_agent() - - # This is awkward since PhabricatorAPI does not accept user agent directly - set_default_value("User-Agent", "name", user_agent) - return PhabricatorAPI(api_key, url) From 3ae5709feec7e2c90f3b9dde895ad879502eed6d Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:41:36 +0100 Subject: [PATCH 14/26] Remove redundant title from Basic Information section The title is already displayed in the page header and is automatically sanitized via the patch_title property. --- bugbug/tools/core/platforms/phabricator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 83a6db1ec2..d685055b76 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -562,7 +562,6 @@ def to_md(self) -> str: md_lines.append("") md_lines.append(f"- **URI**: {self.revision_uri}") md_lines.append(f"- **Revision Author**: {self.author_display}") - md_lines.append(f"- **Title**: {self.patch_title}") md_lines.append(f"- **Status**: {self.revision_status}") md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}") md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}") From b8e1d547409b7cff253262624f2178157c694122 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:41:53 +0100 Subject: [PATCH 15/26] Extract stack_graph to local variable to minimize diff --- bugbug/tools/core/platforms/phabricator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index d685055b76..96fcdb7ad3 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -593,7 +593,8 @@ def to_md(self) -> str: md_lines.append("") md_lines.append("") - if len(self.stack_graph) > 1: + stack_graph = self.stack_graph + if len(stack_graph) > 1: md_lines.append("## Stack Information") md_lines.append("") md_lines.append("**Dependency Graph**:") @@ -603,7 +604,7 @@ def to_md(self) -> str: current_phid = self._revision_metadata["phid"] revision_ids = {} - for phid in self.stack_graph.keys(): + for phid in stack_graph.keys(): if phid == current_phid: revision_ids[phid] = self.revision_id else: @@ -611,7 +612,7 @@ def to_md(self) -> str: revision_phid=phid ).revision_id - for phid, dependencies in self.stack_graph.items(): + for phid, dependencies in stack_graph.items(): from_id = f"D{revision_ids[phid]}" stack_title = self.get_stack_patch_title(phid) From aaf953e7e08a65d0e944c8a8ac2451c9ad01c8b6 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:42:19 +0100 Subject: [PATCH 16/26] Use SanitizedPhabricatorPatch in MCP server Modified MCP server to use SanitizedPhabricatorPatch instead of PhabricatorPatch so sanitization only happens at the API boundary. --- mcp/src/bugbug_mcp/server.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mcp/src/bugbug_mcp/server.py b/mcp/src/bugbug_mcp/server.py index f8cb3cb250..6b85048613 100644 --- a/mcp/src/bugbug_mcp/server.py +++ b/mcp/src/bugbug_mcp/server.py @@ -16,7 +16,10 @@ from bugbug.code_search.searchfox_api import FunctionSearchSearchfoxAPI from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE from bugbug.tools.core.platforms.bugzilla import SanitizedBug -from bugbug.tools.core.platforms.phabricator import PhabricatorPatch +from bugbug.tools.core.platforms.phabricator import ( + PhabricatorPatch, + SanitizedPhabricatorPatch, +) from bugbug.utils import get_secret os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") @@ -117,13 +120,13 @@ def get_bugzilla_bug(bug_id: int) -> str: ) def handle_revision_view_resource(revision_id: int) -> str: """Retrieve a revision from Phabricator alongside its comments.""" - return PhabricatorPatch(revision_id=revision_id).to_md() + return SanitizedPhabricatorPatch(revision_id=revision_id).to_md() @mcp.tool() def get_phabricator_revision(revision_id: int) -> str: """Retrieve a revision from Phabricator alongside its comments.""" - return PhabricatorPatch(revision_id=revision_id).to_md() + return SanitizedPhabricatorPatch(revision_id=revision_id).to_md() llms_txt = FileResource( From 797a7dd45afede2304852050916401cc335c76b1 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:42:39 +0100 Subject: [PATCH 17/26] Revert get_patch_by_id to use PhabricatorPatch This method is not used by the MCP server, so sanitization should not be applied here. Sanitization is handled at the MCP server boundary. --- bugbug/tools/core/platforms/phabricator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 96fcdb7ad3..c64fb2b6fc 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -766,7 +766,7 @@ class PhabricatorReviewData(ReviewData): reraise=True, ) def get_patch_by_id(self, patch_id: str | int) -> Patch: - return SanitizedPhabricatorPatch(patch_id) + return PhabricatorPatch(patch_id) def get_all_inline_comments( self, comment_filter From e554d7bfab5bd9ac7bad1afc3eb3ecb598c425e6 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 18:54:53 +0100 Subject: [PATCH 18/26] Remove _display suffix from property names The _display suffix is a new pattern not in master. Removing it to keep the changes minimal and avoid introducing new naming conventions. --- bugbug/tools/core/platforms/bugzilla.py | 20 +++++------ bugbug/tools/core/platforms/phabricator.py | 42 +++++++++++----------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 184c0e1305..82780b6a12 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -338,11 +338,11 @@ def bug_to_markdown(bug: "Bug") -> str: md_lines.append("## People Involved") - if bug.reporter_display: - md_lines.append(bug.reporter_display) + if bug.reporter: + md_lines.append(bug.reporter) - if bug.assignee_display: - md_lines.append(bug.assignee_display) + if bug.assignee: + md_lines.append(bug.assignee) cc_count = len(bug.cc) if cc_count > 0: @@ -434,7 +434,7 @@ def assignee_detail(self) -> dict: return self._metadata.get("assigned_to_detail", {}) @property - def reporter_display(self) -> str | None: + def reporter(self) -> str | None: detail = self.creator_detail if not detail: return None @@ -445,7 +445,7 @@ def reporter_display(self) -> str | None: return f"- **Reporter**: {name} ({email})" @property - def assignee_display(self) -> str | None: + def assignee(self) -> str | None: detail = self.assignee_detail if not detail: return None @@ -518,20 +518,20 @@ def summary(self) -> str: return super().summary @property - def reporter_display(self) -> str | None: + def reporter(self) -> str | None: if not self.creator_detail: return None if not self._has_trusted_comment: return REDACTED_REPORTER - return super().reporter_display + return super().reporter @property - def assignee_display(self) -> str | None: + def assignee(self) -> str | None: if not self.assignee_detail: return None if not self._has_trusted_comment: return REDACTED_ASSIGNEE - return super().assignee_display + return super().assignee @property def timeline_comments(self) -> list[dict]: diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index c64fb2b6fc..77b005424f 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -497,21 +497,21 @@ def _format_user_display(self, user_phid: str) -> str: return email @property - def author_display(self) -> str: + def author(self) -> str: return self._format_user_display(self.author_phid) @property - def diff_author_display(self) -> str | None: + def diff_author(self) -> str | None: if self.author_phid == self.diff_author_phid: return None return self._format_user_display(self.diff_author_phid) @property - def summary_display(self) -> str | None: + def summary(self) -> str | None: return self._revision_metadata["fields"].get("summary") or None @property - def test_plan_display(self) -> str | None: + def test_plan(self) -> str | None: return self._revision_metadata["fields"].get("testPlan") or None @property @@ -561,7 +561,7 @@ def to_md(self) -> str: md_lines.append("## Basic Information") md_lines.append("") md_lines.append(f"- **URI**: {self.revision_uri}") - md_lines.append(f"- **Revision Author**: {self.author_display}") + md_lines.append(f"- **Revision Author**: {self.author}") md_lines.append(f"- **Status**: {self.revision_status}") md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}") md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}") @@ -570,17 +570,17 @@ def to_md(self) -> str: md_lines.append("") md_lines.append("") - if self.summary_display: + if self.summary: md_lines.append("## Summary") md_lines.append("") - md_lines.append(self.summary_display) + md_lines.append(self.summary) md_lines.append("") md_lines.append("") - if self.test_plan_display: + if self.test_plan: md_lines.append("## Test Plan") md_lines.append("") - md_lines.append(self.test_plan_display) + md_lines.append(self.test_plan) md_lines.append("") md_lines.append("") @@ -588,8 +588,8 @@ def to_md(self) -> str: diff = self._diff_metadata md_lines.append(f"- **Diff ID**: {diff['id']}") md_lines.append(f"- **Base Revision**: `{diff['baseRevision']}`") - if self.diff_author_display: - md_lines.append(f"- **Diff Author**: {self.diff_author_display}") + if self.diff_author: + md_lines.append(f"- **Diff Author**: {self.diff_author}") md_lines.append("") md_lines.append("") @@ -652,9 +652,9 @@ def to_md(self) -> str: date_str = date.strftime(date_format) if comment.content_redacted: - comment_author_display = "[Untrusted User]" + comment_author = "[Untrusted User]" else: - comment_author_display = self._format_user_display(comment.author_phid) + comment_author = self._format_user_display(comment.author_phid) if isinstance(comment, PhabricatorInlineComment): line_info = ( @@ -666,12 +666,12 @@ def to_md(self) -> str: generated_status = " [AI-GENERATED]" if comment.is_generated else "" md_lines.append( - f"**{date_str}** - **Inline Comment** by {comment_author_display} on `{comment.filename}` " + f"**{date_str}** - **Inline Comment** by {comment_author} on `{comment.filename}` " f"at {line_info}{done_status}{generated_status}" ) else: md_lines.append( - f"**{date_str}** - **General Comment** by {comment_author_display}" + f"**{date_str}** - **General Comment** by {comment_author}" ) final_comment_content = comment.content @@ -714,21 +714,21 @@ def patch_title(self) -> str: return self._revision_metadata["fields"]["title"] @property - def author_display(self) -> str: + def author(self) -> str: if not self._has_trusted_comment: return REDACTED_AUTHOR - return super().author_display + return super().author @property - def diff_author_display(self) -> str | None: + def diff_author(self) -> str | None: if self.author_phid == self.diff_author_phid: return None if not self._has_trusted_comment: return REDACTED_AUTHOR - return super().diff_author_display + return super().diff_author @property - def summary_display(self) -> str | None: + def summary(self) -> str | None: summary = self._revision_metadata["fields"].get("summary") if not summary: return None @@ -737,7 +737,7 @@ def summary_display(self) -> str | None: return summary @property - def test_plan_display(self) -> str | None: + def test_plan(self) -> str | None: test_plan = self._revision_metadata["fields"].get("testPlan") if not test_plan: return None From 6e3fc17de309f6c9e8009d2a9e170289f22ae1e5 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 9 Feb 2026 19:00:48 +0100 Subject: [PATCH 19/26] Update tests to match simplified trust logic - Remove mozilla-corporation group check (only editbugs matters) - Remove activity date checks (not useful for trust determination) --- tests/test_bugzilla_trusted_filtering.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index b24c5fe9a3..ddb9e5684b 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -41,7 +41,7 @@ def test_token_set_on_bugzilla_base(): assert Bugzilla.TOKEN == "test_token_12345" assert BugzillaUser.TOKEN == "test_token_12345" - # Mock trusted user response (with mozilla-corporation group) + # Mock trusted user response (with editbugs group) responses.add( responses.GET, "https://bugzilla.mozilla.org/rest/user", @@ -50,9 +50,8 @@ def test_token_set_on_bugzilla_base(): { "name": "trusted@mozilla.com", "groups": [ - {"id": 42, "name": "mozilla-corporation"}, + {"id": 9, "name": "editbugs"}, ], - "last_seen_date": "2024-12-10T00:00:00Z", } ], "faults": [], @@ -448,7 +447,7 @@ def test_trusted_comment_validates_before_untrusted_history_after(): def test_extended_trusted_user_policy(): - """Test extended trusted user policy (editbugs + recent activity).""" + """Test that editbugs users are trusted regardless of activity.""" from unittest.mock import patch from bugbug.tools.core.platforms.bugzilla import _check_users_batch @@ -456,14 +455,12 @@ def test_extended_trusted_user_policy(): mock_users_response = { "users": [ { - "name": "active_editbugs@example.com", + "name": "editbugs_user@example.com", "groups": [{"id": 9, "name": "editbugs"}], - "last_seen_date": "2026-01-01T00:00:00Z", }, { - "name": "inactive_editbugs@example.com", - "groups": [{"id": 9, "name": "editbugs"}], - "last_seen_date": "2020-01-01T00:00:00Z", + "name": "no_editbugs@example.com", + "groups": [], }, ] } @@ -487,11 +484,11 @@ def mock_wait(): mock_instance.wait = mock_wait result = _check_users_batch( - ["active_editbugs@example.com", "inactive_editbugs@example.com"] + ["editbugs_user@example.com", "no_editbugs@example.com"] ) - assert result["active_editbugs@example.com"] is True - assert result["inactive_editbugs@example.com"] is False + assert result["editbugs_user@example.com"] is True + assert result["no_editbugs@example.com"] is False def test_metadata_redacted_without_trusted_comment(): From 6716fd5cabd474c33d4ad9ef2fbc7a6fd406e1e9 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 11 Feb 2026 15:15:38 +0100 Subject: [PATCH 20/26] Split reporter/assignee into separate name/email properties Move markdown formatting from properties to bug_to_markdown() function. This allows accessing individual fields and keeps presentation logic separate from data access. --- bugbug/tools/core/platforms/bugzilla.py | 58 ++++++++++++++++++------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 82780b6a12..0c62ff2a73 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -338,11 +338,11 @@ def bug_to_markdown(bug: "Bug") -> str: md_lines.append("## People Involved") - if bug.reporter: - md_lines.append(bug.reporter) + if bug.reporter_name: + md_lines.append(f"- **Reporter**: {bug.reporter_name} ({bug.reporter_email})") - if bug.assignee: - md_lines.append(bug.assignee) + if bug.assignee_name: + md_lines.append(f"- **Assignee**: {bug.assignee_name} ({bug.assignee_email})") cc_count = len(bug.cc) if cc_count > 0: @@ -434,26 +434,36 @@ def assignee_detail(self) -> dict: return self._metadata.get("assigned_to_detail", {}) @property - def reporter(self) -> str | None: + def reporter_name(self) -> str | None: detail = self.creator_detail if not detail: return None - name = detail.get( + return detail.get( "real_name", detail.get("nick", detail.get("email", "Unknown")) ) - email = detail.get("email", "No email") - return f"- **Reporter**: {name} ({email})" @property - def assignee(self) -> str | None: + def reporter_email(self) -> str | None: + detail = self.creator_detail + if not detail: + return None + return detail.get("email", "No email") + + @property + def assignee_name(self) -> str | None: detail = self.assignee_detail if not detail: return None - name = detail.get( + return detail.get( "real_name", detail.get("nick", detail.get("email", "Unknown")) ) - email = detail.get("email", "No email") - return f"- **Assignee**: {name} ({email})" + + @property + def assignee_email(self) -> str | None: + detail = self.assignee_detail + if not detail: + return None + return detail.get("email", "No email") @property def timeline_comments(self) -> list[dict]: @@ -518,20 +528,36 @@ def summary(self) -> str: return super().summary @property - def reporter(self) -> str | None: + def reporter_name(self) -> str | None: if not self.creator_detail: return None if not self._has_trusted_comment: return REDACTED_REPORTER - return super().reporter + return super().reporter_name @property - def assignee(self) -> str | None: + def reporter_email(self) -> str | None: + if not self.creator_detail: + return None + if not self._has_trusted_comment: + return "No email" + return super().reporter_email + + @property + def assignee_name(self) -> str | None: if not self.assignee_detail: return None if not self._has_trusted_comment: return REDACTED_ASSIGNEE - return super().assignee + return super().assignee_name + + @property + def assignee_email(self) -> str | None: + if not self.assignee_detail: + return None + if not self._has_trusted_comment: + return "No email" + return super().assignee_email @property def timeline_comments(self) -> list[dict]: From 26313a1d01610c85e094f80157c913b4c6baca97 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 11 Feb 2026 15:16:14 +0100 Subject: [PATCH 21/26] Rename timeline_comments/history to comments/history Simplifies property names - "timeline" prefix is redundant. --- bugbug/tools/core/platforms/bugzilla.py | 10 +++++----- bugbug/tools/core/platforms/phabricator.py | 6 +++--- tests/test_bugzilla_trusted_filtering.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 0c62ff2a73..3fb2619ca7 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -370,7 +370,7 @@ def bug_to_markdown(bug: "Bug") -> str: md_lines.append(f"- {rel}") md_lines.append("") - timeline = create_bug_timeline(bug.timeline_comments, bug.timeline_history) + timeline = create_bug_timeline(bug.comments, bug.history) if timeline: md_lines.append("## Bug Timeline") md_lines.append("") @@ -466,11 +466,11 @@ def assignee_email(self) -> str | None: return detail.get("email", "No email") @property - def timeline_comments(self) -> list[dict]: + def comments(self) -> list[dict]: return self._metadata.get("comments", []) @property - def timeline_history(self) -> list[dict]: + def history(self) -> list[dict]: return self._metadata.get("history", []) def to_md(self) -> str: @@ -560,9 +560,9 @@ def assignee_email(self) -> str | None: return super().assignee_email @property - def timeline_comments(self) -> list[dict]: + def comments(self) -> list[dict]: return self._sanitized_timeline[0] @property - def timeline_history(self) -> list[dict]: + def history(self) -> list[dict]: return self._sanitized_timeline[1] diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 77b005424f..74659d5cc8 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -515,7 +515,7 @@ def test_plan(self) -> str | None: return self._revision_metadata["fields"].get("testPlan") or None @property - def timeline_comments(self) -> list: + def comments(self) -> list: return sorted(self._all_comments, key=lambda c: c.date_created) def get_stack_patch_title(self, phid: str) -> str: @@ -647,7 +647,7 @@ def to_md(self) -> str: md_lines.append("## Comments Timeline") md_lines.append("") - for comment in self.timeline_comments: + for comment in self.comments: date = datetime.fromtimestamp(comment.date_created) date_str = date.strftime(date_format) @@ -746,7 +746,7 @@ def test_plan(self) -> str | None: return test_plan @cached_property - def timeline_comments(self) -> list: + def comments(self) -> list: sorted_comments = sorted(self._all_comments, key=lambda c: c.date_created) sanitized, _ = _sanitize_comments(sorted_comments, self._users_info) return sanitized diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index ddb9e5684b..b638c47215 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -674,9 +674,9 @@ def test_admin_tagged_comments_completely_disregarded(): assert not bug._has_trusted_comment # Admin comment should not be in timeline - timeline_comments = bug.timeline_comments - assert len(timeline_comments) == 2 - assert all("Admin comment" not in c["text"] for c in timeline_comments) + comments = bug.comments + assert len(comments) == 2 + assert all("Admin comment" not in c["text"] for c in comments) # Admin comment author should not be in trust cache (wasn't even checked) assert "admin@mozilla.com" not in bug._is_trusted_cache @@ -727,7 +727,7 @@ def test_pre_2022_comments_trusted(): assert bug._has_trusted_comment # Pre-2022 comment shown as-is, post-2022 untrusted filtered - timeline = bug.timeline_comments + timeline = bug.comments assert timeline[0]["text"] == "Pre-2022 comment" assert ( "[Content from untrusted user removed for security]" in timeline[1]["text"] From c79c7667057ba7c0f55a37651142aa6a2484ef0d Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 11 Feb 2026 16:23:17 +0100 Subject: [PATCH 22/26] Revert get_patch_by_id to use PhabricatorPatch The original stack graph code already uses .patch_title which is sanitized through property overrides, so the refactoring into get_stack_patch_title() was unnecessary. Also remove redundant sanitization comment from docstring. --- bugbug/tools/core/platforms/phabricator.py | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 74659d5cc8..fd12cea1ed 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -518,11 +518,6 @@ def test_plan(self) -> str | None: def comments(self) -> list: return sorted(self._all_comments, key=lambda c: c.date_created) - def get_stack_patch_title(self, phid: str) -> str: - if phid == self._revision_metadata["phid"]: - return self.patch_title - return PhabricatorPatch(revision_phid=phid).patch_title - def _get_transactions(self) -> list[dict]: phabricator = get_phabricator_client() @@ -548,8 +543,6 @@ def to_md(self) -> str: Returns a well-structured markdown document that includes revision metadata, diff information, stack information, code changes, and comments. - - Sanitization is handled by property overrides in SanitizedPhabricatorPatch. """ date_format = "%Y-%m-%d %H:%M:%S" md_lines = [] @@ -603,27 +596,28 @@ def to_md(self) -> str: md_lines.append("graph TD") current_phid = self._revision_metadata["phid"] - revision_ids = {} - for phid in stack_graph.keys(): - if phid == current_phid: - revision_ids[phid] = self.revision_id - else: - revision_ids[phid] = PhabricatorPatch( - revision_phid=phid - ).revision_id + patch_map = { + phid: ( + self + if phid == current_phid + else PhabricatorPatch(revision_phid=phid) + ) + for phid in stack_graph.keys() + } for phid, dependencies in stack_graph.items(): - from_id = f"D{revision_ids[phid]}" - stack_title = self.get_stack_patch_title(phid) - + from_patch = patch_map[phid] + from_id = f"D{from_patch.revision_id}" if phid == current_phid: - md_lines.append(f" {from_id}[{stack_title} - CURRENT]") + md_lines.append( + f" {from_id}[{from_patch.patch_title} - CURRENT]" + ) md_lines.append(f" style {from_id} fill:#105823") else: - md_lines.append(f" {from_id}[{stack_title}]") + md_lines.append(f" {from_id}[{from_patch.patch_title}]") for dep_phid in dependencies: - dep_id = f"D{revision_ids[dep_phid]}" + dep_id = f"D{patch_map[dep_phid].revision_id}" md_lines.append(f" {from_id} --> {dep_id}") md_lines.append("```") @@ -751,13 +745,6 @@ def comments(self) -> list: sanitized, _ = _sanitize_comments(sorted_comments, self._users_info) return sanitized - def get_stack_patch_title(self, phid: str) -> str: - if not self._has_trusted_comment: - return REDACTED_TITLE - if phid == self._revision_metadata["phid"]: - return self.patch_title - return PhabricatorPatch(revision_phid=phid).patch_title - class PhabricatorReviewData(ReviewData): @tenacity.retry( From 4e2002ef2ec756a575347628654c73d43dba6b3e Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 11 Feb 2026 17:36:05 +0100 Subject: [PATCH 23/26] Redact email addresses in sanitized timeline data --- bugbug/tools/core/platforms/bugzilla.py | 2 ++ tests/test_bugzilla_trusted_filtering.py | 27 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 3fb2619ca7..089d645f79 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -196,6 +196,7 @@ def _sanitize_timeline_items( if should_filter: filtered_comments_count += 1 comment_copy = comment.copy() + comment_copy["author"] = "[Redacted]" comment_copy["text"] = "[Content from untrusted user removed for security]" sanitized_comments.append(comment_copy) else: @@ -212,6 +213,7 @@ def _sanitize_timeline_items( if should_filter: filtered_history_count += 1 event_copy = event.copy() + event_copy["who"] = "[Redacted]" sanitized_changes = [] for change in event_copy["changes"]: sanitized_changes.append( diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index b638c47215..d02a99469b 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -126,8 +126,11 @@ def test_untrusted_before_last_trusted(): assert len(sanitized) == 3 assert sanitized[0]["text"] == "Untrusted comment" + assert sanitized[0]["author"] == "untrusted@example.com" # Before trusted, shown assert sanitized[1]["text"] == "Trusted comment" + assert sanitized[1]["author"] == "trusted@mozilla.com" assert "[Content from untrusted user removed for security]" in sanitized[2]["text"] + assert sanitized[2]["author"] == "[Redacted]" # After trusted, filtered assert filtered_count == 1 @@ -160,7 +163,9 @@ def test_no_trusted_users(): assert len(sanitized) == 2 assert "[Content from untrusted user removed for security]" in sanitized[0]["text"] + assert sanitized[0]["author"] == "[Redacted]" assert "[Content from untrusted user removed for security]" in sanitized[1]["text"] + assert sanitized[1]["author"] == "[Redacted]" assert filtered_count == 2 @@ -182,6 +187,7 @@ def test_fail_closed_scenarios(): assert len(sanitized) == 1 assert "[Content from untrusted user removed for security]" in sanitized[0]["text"] + assert sanitized[0]["author"] == "[Redacted]" assert filtered_count == 1 @@ -227,9 +233,13 @@ def test_timeline_variation_alternating(): assert len(sanitized) == 4 assert sanitized[0]["text"] == "Trusted" + assert sanitized[0]["author"] == "trusted@mozilla.com" assert sanitized[1]["text"] == "Untrusted" + assert sanitized[1]["author"] == "untrusted@example.com" assert sanitized[2]["text"] == "Trusted again" + assert sanitized[2]["author"] == "trusted@mozilla.com" assert "[Content from untrusted user removed for security]" in sanitized[3]["text"] + assert sanitized[3]["author"] == "[Redacted]" assert filtered_count == 1 @@ -259,7 +269,9 @@ def test_timeline_variation_trusted_first(): assert len(sanitized) == 2 assert sanitized[0]["text"] == "Trusted" + assert sanitized[0]["author"] == "trusted@mozilla.com" assert "[Content from untrusted user removed for security]" in sanitized[1]["text"] + assert sanitized[1]["author"] == "[Redacted]" assert filtered_count == 1 @@ -289,7 +301,9 @@ def test_timeline_variation_trusted_last(): assert len(sanitized) == 2 assert sanitized[0]["text"] == "Untrusted" + assert sanitized[0]["author"] == "untrusted@example.com" assert sanitized[1]["text"] == "Trusted" + assert sanitized[1]["author"] == "trusted@mozilla.com" assert filtered_count == 0 @@ -319,7 +333,9 @@ def test_timeline_variation_all_trusted(): assert len(sanitized) == 2 assert sanitized[0]["text"] == "Trusted 1" + assert sanitized[0]["author"] == "trusted1@mozilla.com" assert sanitized[1]["text"] == "Trusted 2" + assert sanitized[1]["author"] == "trusted2@mozilla.com" assert filtered_count == 0 @@ -365,9 +381,13 @@ def test_timeline_variation_multiple_trusted_positions(): assert len(sanitized) == 4 assert sanitized[0]["text"] == "Untrusted 1" + assert sanitized[0]["author"] == "untrusted@example.com" assert sanitized[1]["text"] == "Trusted 1" + assert sanitized[1]["author"] == "trusted@mozilla.com" assert sanitized[2]["text"] == "Untrusted 2" + assert sanitized[2]["author"] == "untrusted@example.com" assert sanitized[3]["text"] == "Trusted 2" + assert sanitized[3]["author"] == "trusted@mozilla.com" assert filtered_count == 0 @@ -402,7 +422,9 @@ def test_trusted_history_does_not_validate(): "[Content from untrusted user removed for security]" in sanitized_comments[0]["text"] ) + assert sanitized_comments[0]["author"] == "[Redacted]" assert filtered_comments == 1 + assert sanitized_history[0]["who"] == "trusted@mozilla.com" assert filtered_history == 0 @@ -439,10 +461,13 @@ def test_trusted_comment_validates_before_untrusted_history_after(): ) assert sanitized_comments[0]["text"] == "Trusted comment" + assert sanitized_comments[0]["author"] == "trusted@mozilla.com" assert filtered_comments == 0 assert len(sanitized_history) == 2 assert sanitized_history[0]["changes"][0]["added"] == "P1" + assert sanitized_history[0]["who"] == "untrusted@example.com" assert sanitized_history[1]["changes"][0]["added"] == "[Filtered]" + assert sanitized_history[1]["who"] == "[Redacted]" assert filtered_history == 1 @@ -729,9 +754,11 @@ def test_pre_2022_comments_trusted(): # Pre-2022 comment shown as-is, post-2022 untrusted filtered timeline = bug.comments assert timeline[0]["text"] == "Pre-2022 comment" + assert timeline[0]["author"] == "old_user@example.com" assert ( "[Content from untrusted user removed for security]" in timeline[1]["text"] ) + assert timeline[1]["author"] == "[Redacted]" def test_collapsed_tags_filtered(): From ce3d4546fb309abb04680cf4c72dfe970c5fa255 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 11 Feb 2026 18:36:03 +0100 Subject: [PATCH 24/26] Don't trust service accounts --- bugbug/tools/core/platforms/bugzilla.py | 6 +++ tests/test_bugzilla_trusted_filtering.py | 51 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 089d645f79..400329a47a 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -78,10 +78,16 @@ def _check_users_batch(emails: list[str]) -> dict[str, bool]: ) def user_handler(user, data): + # In Bugzilla API, "name" is the user's login (email address) email = user.get("name", "").lower() if not email: return + # Service accounts (*.tld) are not trusted even with editbugs + if email.endswith(".tld"): + data[email] = False + return + groups = user.get("groups", []) group_ids = {g.get("id") for g in groups} diff --git a/tests/test_bugzilla_trusted_filtering.py b/tests/test_bugzilla_trusted_filtering.py index d02a99469b..c95e6d78e8 100644 --- a/tests/test_bugzilla_trusted_filtering.py +++ b/tests/test_bugzilla_trusted_filtering.py @@ -793,3 +793,54 @@ def test_collapsed_tags_filtered(): # Only the normal comment should be in sanitized output assert len(sanitized) == 1 assert sanitized[0]["text"] != f"Comment with {tag} tag" + + +def test_service_accounts_not_trusted(): + """Test that service accounts (*.tld) are not trusted even with editbugs.""" + from unittest.mock import patch + + from bugbug.tools.core.platforms.bugzilla import _check_users_batch + + mock_users_response = { + "users": [ + { + "name": "serviceaccount.tld", + "groups": [{"id": 9, "name": "editbugs"}], + }, + { + "name": "real_user@mozilla.com", + "groups": [{"id": 9, "name": "editbugs"}], + }, + { + "name": "another.service.tld", + "groups": [{"id": 9, "name": "editbugs"}], + }, + ] + } + + with ( + patch("libmozdata.bugzilla.BugzillaBase.TOKEN", "test_token"), + patch("libmozdata.bugzilla.BugzillaUser") as mock_user_class, + ): + mock_instance = mock_user_class.return_value + + def mock_wait(): + call_kwargs = mock_user_class.call_args[1] + user_handler = call_kwargs.get("user_handler") + user_data = call_kwargs.get("user_data", {}) + + for user in mock_users_response["users"]: + user_handler(user, user_data) + return mock_instance + + mock_instance.wait = mock_wait + + result = _check_users_batch( + ["serviceaccount.tld", "real_user@mozilla.com", "another.service.tld"] + ) + + # Service accounts should not be trusted even with editbugs + assert result["serviceaccount.tld"] is False + assert result["another.service.tld"] is False + # Real users with editbugs should be trusted + assert result["real_user@mozilla.com"] is True From dda7e0a79c3a6f01fc15a892c9136369abad7755 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 11 Feb 2026 19:20:59 -0500 Subject: [PATCH 25/26] Drop assertion on removed method --- tests/test_phabricator_trusted_filtering.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py index 5a8a5f6dbc..bfc14a8409 100644 --- a/tests/test_phabricator_trusted_filtering.py +++ b/tests/test_phabricator_trusted_filtering.py @@ -1044,9 +1044,5 @@ def test_phabricator_stack_titles_redacted(): ): patch_obj = SanitizedPhabricatorPatch(diff_id=54321) - # When there's no trusted comment, get_stack_patch_title should return redacted - title = patch_obj.get_stack_patch_title("PHID-DREV-current") - assert title == REDACTED_TITLE - - # Also verify patch_title property is redacted + # Verify patch_title property is redacted assert patch_obj.patch_title == REDACTED_TITLE From 455bf9e84da8b6a7c3c05b38535d7f4d7e9a97d6 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 11 Feb 2026 19:23:10 -0500 Subject: [PATCH 26/26] Create patch for stack graph with the same type as the parent class It is important to have a sanitized patch title --- bugbug/tools/core/platforms/phabricator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index fd12cea1ed..f135a0163a 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -598,9 +598,7 @@ def to_md(self) -> str: current_phid = self._revision_metadata["phid"] patch_map = { phid: ( - self - if phid == current_phid - else PhabricatorPatch(revision_phid=phid) + self if phid == current_phid else self.__class__(revision_phid=phid) ) for phid in stack_graph.keys() }