diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 97c18e6f9..8c3ed83b0 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -33,6 +33,7 @@ from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 +from vulnerabilities.pipelines.v2_improvers import fetch_patch_url as fetch_patch_url_v2 from vulnerabilities.utils import create_registry IMPROVERS_REGISTRY = create_registry( @@ -71,6 +72,7 @@ compute_version_rank_v2.ComputeVersionRankPipeline, compute_advisory_todo_v2.ComputeToDo, unfurl_version_range_v2.UnfurlVersionRangePipeline, + fetch_patch_url_v2.FetchPatchURLImproverPipeline, compute_advisory_todo.ComputeToDo, collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, diff --git a/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py new file mode 100644 index 000000000..3da8c4043 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py @@ -0,0 +1,83 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.utils import fetch_response + + +class FetchPatchURLImproverPipeline(VulnerableCodePipeline): + """FetchPatchURL Improver Pipeline""" + + pipeline_id = "fetch_patch_url" + precedence = 200 + + @classmethod + def steps(cls): + return ( + cls.collect_patch_text, + ) + + def fetch_patch_content(self, url): + """ + Fetches the text content of a patch from a URL. + """ + if not url: + return None + + self.log(f"Fetching `{url}`") + + response = fetch_response(url) + if response: + return response.text.replace("\x00", "") + + self.log(f"Skipping {url} due to fetch failure.") + return None + + def advisories_count(self) -> int: + return ( + PackageCommitPatch.objects.filter(patch_text__isnull=True).count() + + Patch.objects.filter(patch_text__isnull=True).count() + ) + + def collect_patch_text(self): + for pcp in PackageCommitPatch.objects.filter(patch_text__isnull=True): + patch_url = generate_patch_url(pcp.vcs_url, pcp.commit_hash) + content = self.fetch_patch_content(patch_url) + if not content: + continue + pcp.patch_text = content + pcp.save() + + for patch in Patch.objects.filter(patch_text__isnull=True): + content = self.fetch_patch_content(patch.patch_url) + if not content: + continue + + patch.patch_text = content + patch.save() + +def generate_patch_url(vcs_url, commit_hash): + """ + Generate patch URL from VCS URL and commit hash. + """ + if not vcs_url or not commit_hash: + return None + + vcs_url = vcs_url.rstrip("/") + + if vcs_url.startswith("https://github.com"): + return f"{vcs_url}/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://gitlab.com"): + return f"{vcs_url}/-/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://bitbucket.org"): + return f"{vcs_url}/-/commit/{commit_hash}/raw" + elif vcs_url.startswith("https://git.kernel.org"): + return f"{vcs_url}.git/patch/?id={commit_hash}" + return diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py new file mode 100644 index 000000000..8fcf9a92b --- /dev/null +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py @@ -0,0 +1,58 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest import mock +from unittest.mock import MagicMock + +import pytest + +from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.pipelines.v2_improvers.fetch_patch_url import FetchPatchURLImproverPipeline + + +@pytest.mark.django_db +@mock.patch("vulnerabilities.utils.requests.get") +def test_collect_patch_text_success(mock_get): + res1 = MagicMock(status_code=200, text="diff --git a/file1") + res2 = MagicMock(status_code=200, text="diff --git a/file2") + mock_get.side_effect = [res1, res2] + + pcp = PackageCommitPatch.objects.create( + vcs_url="https://github.com/nexB/vulnerablecode", + commit_hash="abc1234", + patch_text=None + ) + + patch = Patch.objects.create( + patch_url="https://gitlab.com/nexB/vulnerablecode/-/commit/def5678.patch", + patch_text=None + ) + pipeline = FetchPatchURLImproverPipeline() + pipeline.collect_patch_text() + + pcp.refresh_from_db() + patch.refresh_from_db() + + assert pcp.patch_text == "diff --git a/file1" + assert patch.patch_text == "diff --git a/file2" + +@pytest.mark.django_db +@mock.patch("vulnerabilities.utils.requests.get") +def test_collect_patch_text_failure(mock_get): + mock_get.side_effect = Exception("Connection Error") + + pcp = PackageCommitPatch.objects.create( + vcs_url="https://github.com/nexB/vulnerablecode", + commit_hash="abc1234", + patch_text=None + ) + + pipeline = FetchPatchURLImproverPipeline() + pipeline.collect_patch_text() + assert pcp.patch_text is None \ No newline at end of file