From afa3b84f26fc43fa75f2d07801e81476e781a644 Mon Sep 17 00:00:00 2001 From: Namanmeet Singh Date: Tue, 24 Feb 2026 00:42:36 +0530 Subject: [PATCH] Fix #2122: Standardize User-Agent headers across Importers to prevent blocking Signed-off-by: Namanmeet Singh --- aboutcode/federated/__init__.py | 4 ++- vulnerabilities/importers/apache_httpd.py | 11 ++++++-- vulnerabilities/importers/apache_kafka.py | 6 +++- vulnerabilities/importers/apache_tomcat.py | 11 ++++++-- vulnerabilities/importers/debian.py | 6 +++- vulnerabilities/importers/debian_oval.py | 7 +++-- vulnerabilities/importers/gsd.py | 6 +++- vulnerabilities/importers/mattermost.py | 7 +++-- vulnerabilities/importers/openssl.py | 6 +++- vulnerabilities/importers/postgresql.py | 7 +++-- vulnerabilities/importers/suse_backports.py | 12 ++++++-- vulnerabilities/importers/suse_oval.py | 11 ++++++-- vulnerabilities/importers/ubuntu.py | 6 +++- vulnerabilities/importers/ubuntu_usn.py | 7 +++-- .../management/commands/commit_export.py | 7 ++++- .../pipelines/enhance_with_exploitdb.py | 6 +++- vulnerabilities/pipelines/enhance_with_kev.py | 6 +++- .../pipelines/enhance_with_metasploit.py | 6 +++- vulnerabilities/pipelines/nginx_importer.py | 6 +++- vulnerabilities/pipelines/nvd_importer.py | 11 ++++++-- vulnerabilities/pipelines/pysec_importer.py | 6 +++- .../v2_importers/apache_httpd_importer.py | 12 ++++++-- .../v2_importers/apache_kafka_importer.py | 6 +++- .../v2_importers/apache_tomcat_importer.py | 11 ++++++-- .../pipelines/v2_importers/nginx_importer.py | 6 +++- .../pipelines/v2_importers/nvd_importer.py | 11 ++++++-- .../v2_importers/postgresql_importer.py | 11 ++++++-- .../pipelines/v2_importers/pysec_importer.py | 6 +++- .../pipelines/v2_importers/redhat_importer.py | 12 ++++++-- .../v2_improvers/enhance_with_exploitdb.py | 6 +++- .../v2_improvers/enhance_with_kev.py | 6 +++- .../v2_improvers/enhance_with_metasploit.py | 7 ++++- .../pipelines/test_enhance_with_exploitdb.py | 6 ++++ .../tests/pipelines/test_enhance_with_kev.py | 6 ++++ .../pipelines/test_enhance_with_metasploit.py | 7 +++++ .../test_apache_httpd_importer_pipeline_v2.py | 8 ++++++ .../test_apache_tomcat_importer_pipeline.py | 11 ++++++++ .../test_postgresql_importer_v2.py | 18 ++++++++++++ .../v2_importers/test_pysec_importer_v2.py | 6 ++++ .../test_enhance_with_exploitdb_v2.py | 6 ++++ .../v2_improvers/test_enhance_with_kev_v2.py | 6 ++++ .../test_enhance_with_metasploit_v2.py | 6 ++++ vulnerabilities/utils.py | 28 +++++++++++++++---- vulnerablecode/settings.py | 4 +++ vulntotal/datasources/deps.py | 7 ++++- vulntotal/datasources/gitlab.py | 16 +++++++++-- vulntotal/datasources/oss_index.py | 7 ++++- vulntotal/datasources/osv.py | 8 +++++- vulntotal/datasources/safetydb.py | 7 ++++- vulntotal/datasources/snyk.py | 7 ++++- vulntotal/datasources/vulnerablecode.py | 19 +++++++++++-- vulntotal/ecosystem/nuget.py | 16 +++++++++-- 52 files changed, 381 insertions(+), 67 deletions(-) diff --git a/aboutcode/federated/__init__.py b/aboutcode/federated/__init__.py index ff3f07e84..799f05c17 100644 --- a/aboutcode/federated/__init__.py +++ b/aboutcode/federated/__init__.py @@ -26,6 +26,8 @@ from packageurl import normalize_subpath from packageurl import normalize_version +from django.conf import settings + __version__ = "0.1.0" """ @@ -559,7 +561,7 @@ def from_url( federation_name=name, config_filename=cls.CONFIG_FILENAME, ) - headers = {"User-Agent": "AboutCode/FederatedCode"} + headers = {"User-Agent": settings.VC_USER_AGENT} response = requests.get(url=rcf_url, headers=headers) if not response.ok: raise Exception(f"Failed to fetch Federation config: {rcf_url}") diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py index 75099ab8f..7fff6bf01 100644 --- a/vulnerabilities/importers/apache_httpd.py +++ b/vulnerabilities/importers/apache_httpd.py @@ -27,6 +27,7 @@ from vulnerabilities.utils import create_weaknesses_list from vulnerabilities.utils import cwe_regex from vulnerabilities.utils import get_item +from django.conf import settings logger = logging.getLogger(__name__) @@ -41,7 +42,10 @@ class ApacheHTTPDImporter(Importer): def advisory_data(self): links = fetch_links(self.base_url) for link in links: - data = requests.get(link).json() + data = requests.get( + link, + headers={'User-Agent': settings.VC_USER_AGENT} + ).json() yield self.to_advisory(data) def to_advisory(self, data): @@ -150,7 +154,10 @@ def to_version_ranges(self, versions_data, fixed_versions): def fetch_links(url): links = [] - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content soup = BeautifulSoup(data, features="lxml") for tag in soup.find_all("a"): link = tag.get("href") diff --git a/vulnerabilities/importers/apache_kafka.py b/vulnerabilities/importers/apache_kafka.py index 27c244b2a..11ca50f07 100644 --- a/vulnerabilities/importers/apache_kafka.py +++ b/vulnerabilities/importers/apache_kafka.py @@ -20,6 +20,7 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from django.conf import settings logger = logging.getLogger(__name__) @@ -99,7 +100,10 @@ class ApacheKafkaImporter(Importer): @staticmethod def fetch_advisory_page(self): - page = requests.get(self.GH_PAGE_URL) + page = requests.get( + self.GH_PAGE_URL, + headers={'User-Agent': settings.VC_USER_AGENT} + ) return page.content def advisory_data(self): diff --git a/vulnerabilities/importers/apache_tomcat.py b/vulnerabilities/importers/apache_tomcat.py index 9d371ee7d..0db44eb51 100644 --- a/vulnerabilities/importers/apache_tomcat.py +++ b/vulnerabilities/importers/apache_tomcat.py @@ -27,6 +27,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.severity_systems import APACHE_TOMCAT +from django.conf import settings LOGGER = logging.getLogger(__name__) @@ -126,7 +127,10 @@ def fetch_advisory_pages(self): """ links = self.fetch_advisory_links("https://tomcat.apache.org/security") for page_url in links: - yield page_url, requests.get(page_url).content + yield page_url, requests.get( + page_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content def fetch_advisory_links(self, url): """ @@ -134,7 +138,10 @@ def fetch_advisory_links(self, url): Each page link is in the form of `https://tomcat.apache.org/security-10.html`, for instance, for v10. """ - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content soup = BeautifulSoup(data, features="lxml") for tag in soup.find_all("a"): link = tag.get("href") diff --git a/vulnerabilities/importers/debian.py b/vulnerabilities/importers/debian.py index 7d1ae2071..2f2d6fec7 100644 --- a/vulnerabilities/importers/debian.py +++ b/vulnerabilities/importers/debian.py @@ -27,6 +27,7 @@ from vulnerabilities.utils import create_weaknesses_list from vulnerabilities.utils import dedupe from vulnerabilities.utils import get_item +from django.conf import settings logger = logging.getLogger(__name__) @@ -83,7 +84,10 @@ class DebianImporter(Importer): importer_name = "Debian Importer" def get_response(self): - response = requests.get(self.api_url) + response = requests.get( + self.api_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) if response.status_code == 200: return response.json() raise Exception( diff --git a/vulnerabilities/importers/debian_oval.py b/vulnerabilities/importers/debian_oval.py index f5a747a11..e6359c449 100644 --- a/vulnerabilities/importers/debian_oval.py +++ b/vulnerabilities/importers/debian_oval.py @@ -14,7 +14,7 @@ import requests from vulnerabilities.importer import OvalImporter - +from django.conf import settings class DebianOvalImporter(OvalImporter): @@ -68,7 +68,10 @@ def _fetch(self): for release in releases: file_url = f"https://www.debian.org/security/oval/oval-definitions-{release}.xml.bz2" self.data_url = file_url - resp = requests.get(file_url).content + resp = requests.get( + file_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content extracted = bz2.decompress(resp) yield ( {"type": "deb", "namespace": "debian", "qualifiers": {"distro": release}}, diff --git a/vulnerabilities/importers/gsd.py b/vulnerabilities/importers/gsd.py index 4d69bd63a..dad54fe14 100644 --- a/vulnerabilities/importers/gsd.py +++ b/vulnerabilities/importers/gsd.py @@ -22,6 +22,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.utils import build_description from vulnerabilities.utils import dedupe +from django.conf import settings logger = logging.getLogger(__name__) @@ -32,7 +33,10 @@ class GSDImporter: # TODO inherit from Importer url = "https://codeload.github.com/cloudsecurityalliance/gsd-database/zip/refs/heads/main" def advisory_data(self) -> Iterable[AdvisoryData]: - response = requests.get(self.url).content + response = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content with ZipFile(BytesIO(response)) as zip_file: for file_name in zip_file.namelist(): if file_name == "gsd-database-main/allowlist.json" or not file_name.endswith( diff --git a/vulnerabilities/importers/mattermost.py b/vulnerabilities/importers/mattermost.py index a422ea32a..c82b5d8c0 100644 --- a/vulnerabilities/importers/mattermost.py +++ b/vulnerabilities/importers/mattermost.py @@ -25,6 +25,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.package_managers import GitHubTagsAPI +from django.conf import settings SECURITY_UPDATES_URL = "https://mattermost.com/security-updates" MM_REPO = { @@ -36,13 +37,13 @@ class MattermostDataSource(Importer): def updated_advisories(self): - # FIXME: Change after this https://forum.mattermost.org/t/mattermost-website-returning-403-when-headers-contain-the-word-python/11412 self.set_api() data = requests.get( - SECURITY_UPDATES_URL, headers={"user-agent": "aboutcode/vulnerablecode"} + SECURITY_UPDATES_URL, + headers={"User-Agent": settings.VC_USER_AGENT}, ).content return self.batch_advisories(self.to_advisories(data)) - + def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run( diff --git a/vulnerabilities/importers/openssl.py b/vulnerabilities/importers/openssl.py index b71206418..c3a336028 100644 --- a/vulnerabilities/importers/openssl.py +++ b/vulnerabilities/importers/openssl.py @@ -25,6 +25,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.severity_systems import SCORING_SYSTEMS +from django.conf import settings logger = logging.getLogger(__name__) @@ -36,7 +37,10 @@ class OpensslImporter(Importer): importer_name = "OpenSSL Importer" def fetch(self): - response = requests.get(url=self.url) + response = requests.get( + url=self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) if not response.status_code == 200: logger.error(f"Error while fetching {self.url}: {response.status_code}") return diff --git a/vulnerabilities/importers/postgresql.py b/vulnerabilities/importers/postgresql.py index 70ab1bfe9..bc5b081b3 100644 --- a/vulnerabilities/importers/postgresql.py +++ b/vulnerabilities/importers/postgresql.py @@ -21,7 +21,7 @@ from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity - +from django.conf import settings class PostgreSQLImporter(Importer): @@ -37,7 +37,10 @@ def advisory_data(self): while True: unvisited_urls = known_urls - visited_urls for url in unvisited_urls: - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content data_by_url[url] = data visited_urls.add(url) known_urls.update(find_advisory_urls(data)) diff --git a/vulnerabilities/importers/suse_backports.py b/vulnerabilities/importers/suse_backports.py index e7863e7e7..c04138e40 100644 --- a/vulnerabilities/importers/suse_backports.py +++ b/vulnerabilities/importers/suse_backports.py @@ -15,12 +15,15 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer from vulnerabilities.utils import create_etag - +from django.conf import settings class SUSEBackportsImporter(Importer): @staticmethod def get_all_urls_of_backports(url): - r = requests.get(url) + r = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) soup = BeautifulSoup(r.content, "lxml") for a_tag in soup.find_all("a", href=True): if a_tag["href"].endswith(".yaml") and a_tag["href"].startswith("backports"): @@ -38,7 +41,10 @@ def updated_advisories(self): def _fetch_yaml(self, url): try: - resp = requests.get(url) + resp = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) resp.raise_for_status() return saneyaml.load(resp.content) diff --git a/vulnerabilities/importers/suse_oval.py b/vulnerabilities/importers/suse_oval.py index 0722682f7..75a79576e 100644 --- a/vulnerabilities/importers/suse_oval.py +++ b/vulnerabilities/importers/suse_oval.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from vulnerabilities.importer import OvalImporter +from django.conf import settings class SuseOvalImporter(OvalImporter): @@ -27,7 +28,10 @@ def __init__(self, *args, **kwargs): self.translations = {"less than": "<", "equals": "=", "greater than or equal": ">="} def _fetch(self): - page = requests.get(self.base_url).text + page = requests.get( + self.base_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).text soup = BeautifulSoup(page, "lxml") suse_oval_files = [ @@ -37,7 +41,10 @@ def _fetch(self): ] for suse_file in filter(suse_oval_files): - response = requests.get(suse_file) + response = requests.get( + suse_file, + headers={'User-Agent': settings.VC_USER_AGENT} + ) extracted = gzip.decompress(response.content) yield ( diff --git a/vulnerabilities/importers/ubuntu.py b/vulnerabilities/importers/ubuntu.py index e47515b93..1e1896cfb 100644 --- a/vulnerabilities/importers/ubuntu.py +++ b/vulnerabilities/importers/ubuntu.py @@ -14,6 +14,7 @@ import requests from vulnerabilities.importer import OvalImporter +from django.conf import settings logger = logging.getLogger(__name__) @@ -77,7 +78,10 @@ def _fetch(self): file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8 self.data_url = file_url logger.info(f"Fetching Ubuntu Oval: {file_url}") - response = requests.get(file_url) + response = requests.get( + file_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) if response.status_code != requests.codes.ok: logger.error( f"Failed to fetch Ubuntu Oval: HTTP {response.status_code} : {file_url}" diff --git a/vulnerabilities/importers/ubuntu_usn.py b/vulnerabilities/importers/ubuntu_usn.py index 1aa247ec6..532cffa68 100644 --- a/vulnerabilities/importers/ubuntu_usn.py +++ b/vulnerabilities/importers/ubuntu_usn.py @@ -16,7 +16,7 @@ from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import is_cve - +from django.conf import settings class UbuntuUSNImporter(Importer): db_url = "https://usn.ubuntu.com/usn-db/database-all.json.bz2" @@ -97,7 +97,10 @@ def get_usn_reference(usn_id): def fetch(url): - response = requests.get(url).content + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content raw_data = bz2.decompress(response) return json.loads(raw_data) diff --git a/vulnerabilities/management/commands/commit_export.py b/vulnerabilities/management/commands/commit_export.py index 9d47904f3..63e60370d 100644 --- a/vulnerabilities/management/commands/commit_export.py +++ b/vulnerabilities/management/commands/commit_export.py @@ -23,6 +23,7 @@ from vulnerablecode.settings import ALLOWED_HOSTS from vulnerablecode.settings import VULNERABLECODE_VERSION +from django.conf import settings logger = logging.getLogger(__name__) @@ -161,7 +162,11 @@ def create_pull_request(self, repo_url, branch, title, body, token): raise ValueError("Invalid GitHub repo URL") url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls" - headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json"} + headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": VC_USER_AGENT # <--- ADD THIS LINE + } data = {"title": title, "head": branch, "base": "main", "body": body} response = requests.post(url, headers=headers, json=data) diff --git a/vulnerabilities/pipelines/enhance_with_exploitdb.py b/vulnerabilities/pipelines/enhance_with_exploitdb.py index 4d2e966d9..081e276c1 100644 --- a/vulnerabilities/pipelines/enhance_with_exploitdb.py +++ b/vulnerabilities/pipelines/enhance_with_exploitdb.py @@ -23,6 +23,7 @@ from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings class ExploitDBImproverPipeline(VulnerableCodePipeline): """ @@ -47,7 +48,10 @@ def fetch_exploits(self): self.log(f"Fetching {exploit_db_url}") try: - response = requests.get(exploit_db_url) + response = requests.get( + exploit_db_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/pipelines/enhance_with_kev.py b/vulnerabilities/pipelines/enhance_with_kev.py index c9fc21a84..59941e716 100644 --- a/vulnerabilities/pipelines/enhance_with_kev.py +++ b/vulnerabilities/pipelines/enhance_with_kev.py @@ -17,6 +17,7 @@ from vulnerabilities.models import Exploit from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings class VulnerabilityKevPipeline(VulnerableCodePipeline): """ @@ -39,7 +40,10 @@ def fetch_exploits(self): self.log(f"Fetching {kev_url}") try: - response = requests.get(kev_url) + response = requests.get( + kev_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/pipelines/enhance_with_metasploit.py b/vulnerabilities/pipelines/enhance_with_metasploit.py index a9b901400..29f58d143 100644 --- a/vulnerabilities/pipelines/enhance_with_metasploit.py +++ b/vulnerabilities/pipelines/enhance_with_metasploit.py @@ -19,6 +19,7 @@ from vulnerabilities.models import Exploit from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings class MetasploitImproverPipeline(VulnerableCodePipeline): """ @@ -40,7 +41,10 @@ def fetch_exploits(self): url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" self.log(f"Fetching {url}") try: - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/pipelines/nginx_importer.py b/vulnerabilities/pipelines/nginx_importer.py index c5e017033..048feb505 100644 --- a/vulnerabilities/pipelines/nginx_importer.py +++ b/vulnerabilities/pipelines/nginx_importer.py @@ -22,6 +22,7 @@ from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.severity_systems import GENERIC +from django.conf import settings class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline): @@ -44,7 +45,10 @@ def steps(cls): def fetch(self): self.log(f"Fetch `{self.url}`") - self.advisory_data = requests.get(self.url).text + self.advisory_data = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).text def advisories_count(self): return self.advisory_data.count("
  • ") diff --git a/vulnerabilities/pipelines/nvd_importer.py b/vulnerabilities/pipelines/nvd_importer.py index ccb067ec9..eb26829af 100644 --- a/vulnerabilities/pipelines/nvd_importer.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -27,6 +27,7 @@ from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item +from django.conf import settings class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from NVD.""" @@ -81,7 +82,10 @@ def advisories_count(self): advisory_count = 0 try: - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() data = response.json() except requests.HTTPError as http_err: @@ -103,7 +107,10 @@ def collect_advisories(self) -> Iterable[AdvisoryData]: def fetch(url, logger=None): if logger: logger(f"Fetching `{url}`") - gz_file = requests.get(url) + gz_file = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) data = gzip.decompress(gz_file.content) return json.loads(data) diff --git a/vulnerabilities/pipelines/pysec_importer.py b/vulnerabilities/pipelines/pysec_importer.py index 32a9fd896..a816fac9b 100644 --- a/vulnerabilities/pipelines/pysec_importer.py +++ b/vulnerabilities/pipelines/pysec_importer.py @@ -17,6 +17,7 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from django.conf import settings class PyPIImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from PyPI.""" @@ -38,7 +39,10 @@ def steps(cls): def fetch_zip(self): self.log(f"Fetching `{self.url}`") - self.advisory_zip = requests.get(self.url).content + self.advisory_zip = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content def advisories_count(self) -> int: with ZipFile(BytesIO(self.advisory_zip)) as zip: diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py index 51e4b5e77..d31f25315 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -31,12 +31,17 @@ from vulnerabilities.utils import cwe_regex from vulnerabilities.utils import get_item +from django.conf import settings + logger = logging.getLogger(__name__) def fetch_links(url): links = [] - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content soup = BeautifulSoup(data, features="lxml") for tag in soup.find_all("a"): link = tag.get("href") @@ -229,7 +234,10 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]: if not self.links: self.links = fetch_links(self.base_url) for link in self.links: - data = requests.get(link).json() + data = requests.get( + link, + headers={'User-Agent': settings.VC_USER_AGENT} + ).json() yield self.to_advisory(data) def advisories_count(self) -> int: diff --git a/vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py b/vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py index d05ed757d..17ff77b24 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py @@ -28,6 +28,7 @@ from vulnerabilities.pipes.apache_kafka import parse_summary from vulnerabilities.utils import build_description +from django.conf import settings class ApacheKafkaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Import Apache Kafka Advisories""" @@ -59,7 +60,10 @@ def steps(cls): def fetch(self): self.log(f"Fetch `{self.url}`") - self.advisory_data = requests.get(self.url).text + self.advisory_data = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).text self.soup = BeautifulSoup(self.advisory_data, features="lxml") def advisories_count(self): diff --git a/vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py b/vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py index f55665c56..cc8a6ea49 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py @@ -27,6 +27,7 @@ from vulnerabilities.importer import AffectedPackageV2 from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from django.conf import settings class ApacheTomcatImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """ @@ -48,7 +49,10 @@ def fetch_advisory_links(self): Each page link is in the form of `https://tomcat.apache.org/security-10.html`, for instance, for v10. """ - data = requests.get(self.base_url).content + data = requests.get( + self.base_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content soup = BeautifulSoup(data, features="lxml") for tag in soup.find_all("a"): link = tag.get("href") @@ -67,7 +71,10 @@ def advisories_count(cls): def collect_advisories(self) -> Iterable[AdvisoryDataV2]: for page_url in self.fetch_advisory_links(): try: - content = requests.get(page_url).content + content = requests.get( + page_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content tomcat_advisories = parse_tomcat_security(content) self.log(f"Processing {len(tomcat_advisories)} advisories from {page_url}") grouped = defaultdict(list) diff --git a/vulnerabilities/pipelines/v2_importers/nginx_importer.py b/vulnerabilities/pipelines/v2_importers/nginx_importer.py index 81448166b..c45fa8e21 100644 --- a/vulnerabilities/pipelines/v2_importers/nginx_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nginx_importer.py @@ -27,6 +27,7 @@ from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.severity_systems import GENERIC +from django.conf import settings class NginxImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect Nginx security advisories.""" @@ -48,7 +49,10 @@ def steps(cls): def fetch(self): self.log(f"Fetch `{self.url}`") - self.advisory_data = requests.get(self.url).text + self.advisory_data = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT}, + ).text def advisories_count(self): return self.advisory_data.count("

  • ") diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py index d689aaa05..cbf883a45 100644 --- a/vulnerabilities/pipelines/v2_importers/nvd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -27,6 +27,7 @@ from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item +from django.conf import settings class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """ @@ -82,7 +83,10 @@ def advisories_count(self): advisory_count = 0 try: - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() data = response.json() except requests.HTTPError as http_err: @@ -104,7 +108,10 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]: def fetch(url, logger=None): if logger: logger(f"Fetching `{url}`") - gz_file = requests.get(url) + gz_file = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) data = gzip.decompress(gz_file.content) try: data = data.decode("utf-8") diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py index 10a6136e7..a9d538703 100644 --- a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -23,6 +23,7 @@ from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from django.conf import settings class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """ @@ -50,7 +51,10 @@ def advisories_count(self) -> int: def collect_advisories(self) -> Iterable[AdvisoryDataV2]: url = "https://www.postgresql.org/support/security/" - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content yield from self.to_advisories(data, url) def collect_links(self): @@ -60,7 +64,10 @@ def collect_links(self): while True: unvisited_urls = known_urls - visited_urls for url in unvisited_urls: - data = requests.get(url).content + data = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content visited_urls.add(url) known_urls.update(self.find_advisory_urls(data)) if known_urls == visited_urls: diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index 05614b961..3b2ee60d4 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -18,6 +18,7 @@ from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.pipes.osv_v2 import parse_advisory_data_v3 +from django.conf import settings class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """ @@ -40,7 +41,10 @@ def steps(cls): def fetch_zip(self): self.log(f"Fetching `{self.url}`") - self.advisory_zip = requests.get(self.url).content + self.advisory_zip = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ).content def advisories_count(self) -> int: with ZipFile(BytesIO(self.advisory_zip)) as zip: diff --git a/vulnerabilities/pipelines/v2_importers/redhat_importer.py b/vulnerabilities/pipelines/v2_importers/redhat_importer.py index 5dde4ce8f..5c1fbb599 100644 --- a/vulnerabilities/pipelines/v2_importers/redhat_importer.py +++ b/vulnerabilities/pipelines/v2_importers/redhat_importer.py @@ -33,6 +33,7 @@ from vulnerabilities.utils import load_json from vulntotal import vulntotal_utils +from django.conf import settings class RedHatImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Import RedHat Advisories (RHSA, RHEA and RHBA) @@ -58,7 +59,10 @@ def steps(cls): def fetch(self): archive_latest_url = urljoin(self.url, "archive_latest.txt") - response = requests.get(archive_latest_url) + response = requests.get( + archive_latest_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() self.latest_archive_name = response.text.strip() @@ -66,7 +70,11 @@ def fetch(self): archive_path = self.location / self.latest_archive_name archive_url = urljoin(self.url, self.latest_archive_name) - response = requests.get(archive_url, stream=True) + response = requests.get( + archive_url, + headers={'User-Agent': settings.VC_USER_AGENT}, + stream=True + ) response.raise_for_status() with open(archive_path, "wb") as f: diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py index c306502d8..7f567ff58 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py @@ -23,6 +23,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings class ExploitDBImproverPipeline(VulnerableCodePipeline): """ @@ -47,7 +48,10 @@ def fetch_exploits(self): self.log(f"Fetching {exploit_db_url}") try: - response = requests.get(exploit_db_url) + response = requests.get( + exploit_db_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py index 486d79232..753b6150c 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py @@ -18,6 +18,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings class VulnerabilityKevPipeline(VulnerableCodePipeline): """ @@ -40,7 +41,10 @@ def fetch_exploits(self): self.log(f"Fetching {kev_url}") try: - response = requests.get(kev_url) + response = requests.get( + kev_url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py index fbfea5150..a4b5dbc1b 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py @@ -20,6 +20,8 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline +from django.conf import settings + class MetasploitImproverPipeline(VulnerableCodePipeline): """ @@ -41,7 +43,10 @@ def fetch_exploits(self): url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" self.log(f"Fetching {url}") try: - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) response.raise_for_status() except requests.exceptions.HTTPError as http_err: self.log( diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb.py b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb.py index f54dad55d..87ec46970 100644 --- a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb.py +++ b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb.py @@ -18,6 +18,8 @@ from vulnerabilities.models import Vulnerability from vulnerabilities.pipelines.enhance_with_exploitdb import ExploitDBImproverPipeline +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv") @@ -45,3 +47,7 @@ def test_exploit_db_improver(mock_get): # Run Exploit-DB Improver again when there are matching aliases. improver.execute() assert Exploit.objects.count() == 1 + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT \ No newline at end of file diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_kev.py b/vulnerabilities/tests/pipelines/test_enhance_with_kev.py index a93c16555..f34482f48 100644 --- a/vulnerabilities/tests/pipelines/test_enhance_with_kev.py +++ b/vulnerabilities/tests/pipelines/test_enhance_with_kev.py @@ -19,6 +19,8 @@ from vulnerabilities.pipelines.enhance_with_kev import VulnerabilityKevPipeline from vulnerabilities.utils import load_json +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json") @@ -45,3 +47,7 @@ def test_kev_improver(mock_get): # Run Kev Improver again when there are matching aliases. improver.execute() assert Exploit.objects.count() == 1 + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT \ No newline at end of file diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit.py b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit.py index eea99e0ca..b0c442348 100644 --- a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit.py +++ b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit.py @@ -19,6 +19,8 @@ from vulnerabilities.pipelines.enhance_with_metasploit import MetasploitImproverPipeline from vulnerabilities.utils import load_json +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json") @@ -42,3 +44,8 @@ def test_metasploit_improver(mock_get): # Run metasploit Improver again when there are matching aliases. improver.execute() assert Exploit.objects.count() == 1 + + # Verify that the correct User-Agent header was passed in the request + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed to requests.get!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/v2_importers/test_apache_httpd_importer_pipeline_v2.py index 51e91190e..e8865628f 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_apache_httpd_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_apache_httpd_importer_pipeline_v2.py @@ -15,6 +15,7 @@ from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses +from django.conf import settings # Dummy responses class DummyResponseContent: @@ -53,6 +54,10 @@ def test_fetch_links_filters_and_resolves(monkeypatch): # Monkeypatch HTTP GET for HTML def fake_get(url): assert url == base_url + + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT + return DummyResponseContent(html.encode("utf-8")) monkeypatch.setattr(requests, "get", fake_get) @@ -128,6 +133,9 @@ def test_collect_advisories_and_to_advisory(monkeypatch, pipeline): } # Monkeypatch requests.get to return JSON def fake_get(u): + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT + if u == "u1": return DummyResponseJSON(sample1) elif u == "u2": diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_apache_tomcat_importer_pipeline.py b/vulnerabilities/tests/pipelines/v2_importers/test_apache_tomcat_importer_pipeline.py index 84ab9db0c..7d7680fd3 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_apache_tomcat_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_apache_tomcat_importer_pipeline.py @@ -22,6 +22,8 @@ from vulnerabilities.pipelines.v2_importers.apache_tomcat_importer import TomcatAdvisoryData from vulnerabilities.pipelines.v2_importers.apache_tomcat_importer import parse_tomcat_security +from django.conf import settings + TOMCAT_SECURITY_HTML = """ @@ -96,6 +98,11 @@ def test_pipeline_groups_by_cve_per_page(mock_get): assert advisory.summary == "Request smuggling vulnerability" assert len(advisory.affected_packages) == 4 + + # Verify the User-Agent header was passed + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed to requests.get!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT def test_affected_packages_structure(): @@ -143,3 +150,7 @@ def test_apache_and_maven_version_ranges_created(mock_get): for r in maven_ranges: assert isinstance(r, MavenVersionRange) + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed to requests.get!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_postgresql_importer_v2.py b/vulnerabilities/tests/pipelines/v2_importers/test_postgresql_importer_v2.py index 5235a2e47..ce971da8a 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_postgresql_importer_v2.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_postgresql_importer_v2.py @@ -15,6 +15,8 @@ from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.pipelines.v2_importers.postgresql_importer import PostgreSQLImporterPipeline +from django.conf import settings + HTML_PAGE_WITH_LINKS = """ @@ -89,6 +91,10 @@ def test_collect_advisories(mock_get, importer): assert str(advisory.affected_packages[0].fixed_version_range) == "vers:generic/10.2.0" assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.0.0")) assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.1.0")) + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT @patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") @@ -102,6 +108,10 @@ def test_collect_advisories_with_no_fixed_version_range(mock_get, importer): assert advisory.affected_packages[0].fixed_version_range is None assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.5")) assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.6")) + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT @patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") @@ -114,6 +124,10 @@ def test_cvss_parsing(mock_get, importer): assert severity.system.identifier == "cvssv3" assert severity.value == "9.8" assert "AV:N/AC:L/PR:N/UI:N" in severity.scoring_elements + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT @patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") @@ -124,3 +138,7 @@ def test_collect_links(mock_get, importer): assert len(importer.links) == 3 assert any("advisory1.html" in link for link in importer.links) assert any("advisory2.html" in link for link in importer.links) + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_pysec_importer_v2.py b/vulnerabilities/tests/pipelines/v2_importers/test_pysec_importer_v2.py index 60ec34f88..06205177f 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_pysec_importer_v2.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_pysec_importer_v2.py @@ -10,6 +10,7 @@ PyPIImporterPipeline, # Path to the PyPI Importer ) +from django.conf import settings @pytest.fixture def mock_zip_data(): @@ -51,6 +52,11 @@ def test_fetch_zip(mock_requests_get, mock_zip_data): # Verify that the zip file content is correctly assigned assert pipeline.advisory_zip == mock_zip_data.read() + + # Verify the User-Agent header was passed + args, kwargs = mock_requests_get.call_args + assert "headers" in kwargs, "Headers were not passed to requests.get!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT def test_advisories_count(mock_requests_get, mock_zip_data): diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py index 41f96d706..51a4e81cf 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py @@ -19,6 +19,8 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines.v2_improvers.enhance_with_exploitdb import ExploitDBImproverPipeline +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "../../test_data", "exploitdb_improver/files_exploits.csv") @@ -54,3 +56,7 @@ def test_exploit_db_improver(mock_get): # Run Exploit-DB Improver again when there are matching aliases. improver.execute() assert AdvisoryExploit.objects.count() == 1 + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py index ab4df9cf2..cb05b1bad 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py @@ -20,6 +20,8 @@ from vulnerabilities.pipelines.v2_improvers.enhance_with_kev import VulnerabilityKevPipeline from vulnerabilities.utils import load_json +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "../../test_data", "kev_data.json") @@ -55,3 +57,7 @@ def test_kev_improver(mock_get): # Run Kev Improver again when there are matching aliases. improver.execute() assert AdvisoryExploit.objects.count() == 1 + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py index 447dea9d3..8ebc53429 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py @@ -22,6 +22,8 @@ ) from vulnerabilities.utils import load_json +from django.conf import settings + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join( BASE_DIR, "../../test_data", "metasploit_improver/modules_metadata_base.json" @@ -56,3 +58,7 @@ def test_metasploit_improver(mock_get): # Run metasploit Improver again when there are matching aliases. improver.execute() assert AdvisoryExploit.objects.count() == 1 + + args, kwargs = mock_get.call_args + assert "headers" in kwargs, "Headers were not passed!" + assert kwargs["headers"]["User-Agent"] == settings.VC_USER_AGENT diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 82f29bcea..3bfe5f178 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -41,6 +41,7 @@ from univers.version_range import VersionRange from aboutcode.hashid import build_vcid +from django.conf import settings logger = logging.getLogger(__name__) @@ -75,7 +76,10 @@ def load_toml(path): def fetch_yaml(url): - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) return saneyaml.load(response.content) @@ -123,6 +127,9 @@ def requests_with_5xx_retry(max_retries=5, backoff_factor=0.5): ) adapter = requests.adapters.HTTPAdapter(max_retries=retries) session = requests.Session() + + session.headers.update({'User-Agent': settings.VC_USER_AGENT}) + session.mount("https://", adapter) session.mount("http://", adapter) return session @@ -284,13 +291,17 @@ def _get_gh_response(gh_token, graphql_query): Convenience function to easy mocking in tests """ endpoint = "https://api.github.com/graphql" - headers = {"Authorization": f"bearer {gh_token}"} + + headers = { + "Authorization": f"bearer {gh_token}", + "User-Agent": settings.VC_USER_AGENT + } + try: return requests.post(endpoint, headers=headers, json=graphql_query).json() except Exception as e: logger.error(f"Failed to fetch data from GitHub GraphQL API: {e}") - def dedupe(original: List) -> List: """ Remove all duplicate items and return a new list preserving ordering @@ -390,7 +401,8 @@ def fetch_response(url): Fetch and return `response` from the `url` """ try: - response = requests.get(url) + response = requests.get(url, headers={'User-Agent': settings.VC_USER_AGENT}) + if response.status_code == HTTPStatus.OK: return response raise Exception( @@ -400,7 +412,6 @@ def fetch_response(url): logger.error(f"Error fetching data from {url!r}: {e}") return None - # This should be a method on PackageURL def plain_purl(purl): """ @@ -418,7 +429,12 @@ def plain_purl(purl): def fetch_and_read_from_csv(url): - response = urllib.request.urlopen(url) + req = urllib.request.Request( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) + + response = urllib.request.urlopen(req) lines = [l.decode("utf-8") for l in response.readlines()] return csv.reader(lines) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index ae6638b76..b2902ceed 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -399,3 +399,7 @@ FEDERATEDCODE_GIT_SERVICE_TOKEN = env.str("FEDERATEDCODE_GIT_SERVICE_TOKEN", default="") FEDERATEDCODE_GIT_SERVICE_NAME = env.str("FEDERATEDCODE_GIT_SERVICE_NAME", default="") FEDERATEDCODE_GIT_SERVICE_EMAIL = env.str("FEDERATEDCODE_GIT_SERVICE_EMAIL", default="") + +# Standardized User-Agent for all external network requests by Importers + +VC_USER_AGENT = "VulnerableCode/1.0 (+https://github.com/aboutcode-org/vulnerablecode)" \ No newline at end of file diff --git a/vulntotal/datasources/deps.py b/vulntotal/datasources/deps.py index 3188b9302..bbd8e1648 100644 --- a/vulntotal/datasources/deps.py +++ b/vulntotal/datasources/deps.py @@ -17,6 +17,8 @@ from vulntotal.validator import DataSource from vulntotal.validator import VendorData +from django.conf import settings + logger = logging.getLogger(__name__) @@ -25,7 +27,10 @@ class DepsDataSource(DataSource): license_url = "TODO" def fetch_json_response(self, url): - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) if response.status_code != 200 or response.text == "Not Found": logger.error(f"Error while fetching {url}") return diff --git a/vulntotal/datasources/gitlab.py b/vulntotal/datasources/gitlab.py index dbf84dce7..1f2d44cb2 100644 --- a/vulntotal/datasources/gitlab.py +++ b/vulntotal/datasources/gitlab.py @@ -23,6 +23,8 @@ from vulntotal.validator import VendorData from vulntotal.vulntotal_utils import gitlab_constraints_satisfied +from django.conf import settings + logger = logging.getLogger(__name__) @@ -69,14 +71,18 @@ def supported_ecosystem(cls): def fetch_directory_contents(package_slug): url = f"https://gitlab.com/api/v4/projects/12006272/repository/tree?path={package_slug}" - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) if response.status_code == 200: return response.json() def fetch_yaml(file_path): response = requests.get( - f"https://gitlab.com/gitlab-org/security-products/gemnasium-db/-/raw/master/{file_path}" + f"https://gitlab.com/gitlab-org/security-products/gemnasium-db/-/raw/master/{file_path}", + headers={'User-Agent': settings.VC_USER_AGENT} ) if response.status_code == 200: return response.text @@ -147,7 +153,11 @@ def get_casesensitive_slug(path, package_slug): has_next = True while has_next: - response = requests.post(url, json=payload).json() + response = requests.post( + url, + headers={'User-Agent': settings.VC_USER_AGENT}, + json=payload + ).json() paginated_tree = response[0]["data"]["project"]["repository"]["paginatedTree"] for slug in paginated_tree["nodes"][0]["trees"]["nodes"]: diff --git a/vulntotal/datasources/oss_index.py b/vulntotal/datasources/oss_index.py index a18e65d21..29cf632ed 100644 --- a/vulntotal/datasources/oss_index.py +++ b/vulntotal/datasources/oss_index.py @@ -17,6 +17,8 @@ from vulntotal.validator import DataSource from vulntotal.validator import VendorData +from django.conf import settings + logger = logging.getLogger(__name__) @@ -42,7 +44,10 @@ def fetch_json_response(self, coordinates): if username and token: auth = (username, token) url = self.api_authenticated - response = requests.post(url, auth=auth, json={"coordinates": coordinates}) + response = requests.post(url, + headers={'User-Agent': settings.VC_USER_AGENT}, + auth=auth, + json={"coordinates": coordinates}) try: response.raise_for_status() return response.json() diff --git a/vulntotal/datasources/osv.py b/vulntotal/datasources/osv.py index 4adf6322c..cd91f641f 100644 --- a/vulntotal/datasources/osv.py +++ b/vulntotal/datasources/osv.py @@ -18,6 +18,8 @@ from vulntotal.validator import VendorData from vulntotal.vulntotal_utils import get_item +from django.conf import settings + logger = logging.getLogger(__name__) @@ -37,7 +39,11 @@ def fetch_advisory(self, payload): A JSON object containing the advisory information for the package, or None if an error occurs while fetching data from the OSV API. """ - response = requests.post(self.url, data=str(payload)) + response = requests.post( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT}, + data=str(payload) + ) try: response.raise_for_status() except requests.exceptions.HTTPError as e: diff --git a/vulntotal/datasources/safetydb.py b/vulntotal/datasources/safetydb.py index 9ccad98b3..aae5b32c6 100644 --- a/vulntotal/datasources/safetydb.py +++ b/vulntotal/datasources/safetydb.py @@ -18,6 +18,8 @@ from vulntotal.validator import InvalidCVEError from vulntotal.validator import VendorData +from django.conf import settings + logger = logging.getLogger(__name__) @@ -36,7 +38,10 @@ def fetch_advisory(self): A JSON object containing the advisory information for insecure packages, or None if an error occurs while fetching data from safetydb repo's URL. """ - response = requests.get(self.url) + response = requests.get( + self.url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) try: response.raise_for_status() except requests.exceptions.HTTPError as e: diff --git a/vulntotal/datasources/snyk.py b/vulntotal/datasources/snyk.py index 3aabfb416..8f6ac50f8 100644 --- a/vulntotal/datasources/snyk.py +++ b/vulntotal/datasources/snyk.py @@ -22,6 +22,8 @@ from vulntotal.validator import VendorData from vulntotal.vulntotal_utils import snyk_constraints_satisfied +from django.conf import settings + logger = logging.getLogger(__name__) fixed_version_pattern = re.compile(r"\b\d[\w.-]*\b") @@ -42,7 +44,10 @@ def fetch(self, url): A string of HTML or a dictionary of JSON if the response is successful, or None if the response is unsuccessful. """ - response = requests.get(url) + response = requests.get( + url, + headers={'User-Agent': settings.VC_USER_AGENT} + ) try: response.raise_for_status() except requests.exceptions.HTTPError as e: diff --git a/vulntotal/datasources/vulnerablecode.py b/vulntotal/datasources/vulnerablecode.py index d0122db83..2e87b198c 100644 --- a/vulntotal/datasources/vulnerablecode.py +++ b/vulntotal/datasources/vulnerablecode.py @@ -19,6 +19,8 @@ from vulntotal.validator import DataSource from vulntotal.validator import VendorData +from django.conf import settings + logger = logging.getLogger(__name__) @@ -139,9 +141,22 @@ def fetch_vulnerablecode_query(url: str, payload: dict): raise VCIOTokenError(msg) response = ( - requests.post(url, headers={"Authorization": f"Token {vcio_token}"}, json=payload) + requests.post( + url, + headers={ + "Authorization": f"Token {vcio_token}", + "User-Agent": settings.VC_USER_AGENT + }, + json=payload + ) if payload is not None - else requests.get(url, headers={"Authorization": f"Token {vcio_token}"}) + else requests.get( + url, + headers={ + "Authorization": f"Token {vcio_token}", + "User-Agent": settings.VC_USER_AGENT + } + ) ) if response.text.startswith('{"detail":'): diff --git a/vulntotal/ecosystem/nuget.py b/vulntotal/ecosystem/nuget.py index eac2d629a..78b4afdc0 100644 --- a/vulntotal/ecosystem/nuget.py +++ b/vulntotal/ecosystem/nuget.py @@ -8,6 +8,7 @@ # from urllib.parse import urljoin +from django.conf import settings import requests @@ -25,7 +26,10 @@ def get_closest_nuget_package_name(query): url_nuget_service = "https://api.nuget.org/v3/index.json" url_nuget_search = "" - api_resources = requests.get(url_nuget_service).json() + api_resources = requests.get( + url_nuget_service, + headers={'User-Agent': settings.VC_USER_AGENT} + ).json() for resource in api_resources.get("resources") or []: if resource.get("@type") == "SearchQueryService": url_nuget_search = resource["@id"] @@ -33,7 +37,10 @@ def get_closest_nuget_package_name(query): if url_nuget_search: url_query = urljoin(url_nuget_search, f"?q={query}") - query_response = requests.get(url_query).json() + query_response = requests.get( + url_query, + headers={'User-Agent': settings.VC_USER_AGENT} + ).json() if query_response.get("data"): return query_response["data"][0]["id"] @@ -55,7 +62,10 @@ def search_closest_nuget_package_name(query): ... """ url_query = f"https://azuresearch-usnc.nuget.org/autocomplete?q={query}" - query_response = requests.get(url_query).json() + query_response = requests.get( + url_query, + headers={'User-Agent': settings.VC_USER_AGENT} + ).json() data = query_response.get("data") if data: return data[0]