From a0ff18d8624958656a03ad3eb1fd43f9adc47533 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 19 Jan 2026 22:54:41 +0900
Subject: [PATCH 01/23] exp(pypi): store necessary facts fetched from SimpleAPI
This allows us to cache what we store from the SimpleAPI in between the
runs.
---
python/private/pypi/extension.bzl | 36 +-
python/private/pypi/hub_builder.bzl | 10 +-
python/private/pypi/parse_requirements.bzl | 19 +-
python/private/pypi/parse_simpleapi_html.bzl | 77 ++--
python/private/pypi/simpleapi_download.bzl | 350 +++++++++++++++----
5 files changed, 396 insertions(+), 96 deletions(-)
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 3927f61c00..5b2b21829a 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -223,6 +223,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
# versions.
pip_hub_map = {}
simpleapi_cache = {}
+ facts = {}
for mod in module_ctx.modules:
for pip_attr in mod.tags.parse:
@@ -240,6 +241,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
evaluate_markers_fn = kwargs.get("evaluate_markers", None),
available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
+ facts = facts,
)
pip_hub_map[pip_attr.hub_name] = builder
elif pip_hub_map[hub_name].module_name != mod.name:
@@ -286,6 +288,25 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_group_map[hub.name] = out.group_map
hub_whl_map[hub.name] = out.whl_map
+ facts = {
+ "fact_version": facts.get("fact_version"),
+ } | {
+ index_url: {
+ k: _sorted_dict(f.get(k))
+ for k in [
+ "dist_filenames",
+ "dist_hashes",
+ "dist_yanked",
+ ]
+ if f.get(k)
+ }
+ for index_url, f in facts.items()
+ if index_url not in ["fact_version"]
+ }
+ if len(facts) == 1:
+ # only version is present, skip writing
+ facts = None
+
return struct(
config = config,
exposed_packages = exposed_packages,
@@ -294,6 +315,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_whl_map = hub_whl_map,
whl_libraries = whl_libraries,
whl_mods = whl_mods,
+ facts = facts,
platform_config_settings = {
hub_name: {
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
@@ -303,6 +325,12 @@ You cannot use both the additive_build_content and additive_build_content_file a
},
)
+def _sorted_dict(d):
+ if not d:
+ return {}
+
+ return {k: v for k, v in sorted(d.items())}
+
def _pip_impl(module_ctx):
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
@@ -391,9 +419,11 @@ def _pip_impl(module_ctx):
groups = mods.hub_group_map.get(hub_name),
)
- return module_ctx.extension_metadata(
- reproducible = True,
- )
+ kwargs = {"reproducible": True}
+ if mods.facts:
+ kwargs["facts"] = mods.facts
+
+ return module_ctx.extension_metadata(**kwargs)
_default_attrs = {
"arch_name": attr.string(
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index f54d02d8b0..2a75f0112e 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -31,6 +31,7 @@ def hub_builder(
simpleapi_download_fn,
evaluate_markers_fn,
logger,
+ facts = None,
simpleapi_cache = {}):
"""Return a hub builder instance
@@ -47,6 +48,7 @@ def hub_builder(
used during the `repository_rule` and must be always compatible with the host.
simpleapi_download_fn: the function used to download from SimpleAPI.
simpleapi_cache: the cache for the download results.
+ facts: the facts if they are available.
logger: the logger for this builder.
"""
@@ -69,6 +71,7 @@ def hub_builder(
_platforms = {},
_group_name_by_whl = {},
_get_index_urls = {},
+ _facts = facts,
_use_downloader = {},
_simpleapi_cache = simpleapi_cache,
# instance constants
@@ -335,11 +338,16 @@ def _set_get_index_urls(self, pip_attr):
d
for d in distributions
if _use_downloader(self, python_version, d)
- ],
+ ] if type(distributions) == "list" else {
+ d: versions
+ for d, versions in distributions.items()
+ if _use_downloader(self, python_version, d)
+ },
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
+ facts = self._facts,
),
cache = self._simpleapi_cache,
parallel_download = pip_attr.parallel_download,
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index 5c05c753fd..f9a6e672bf 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -170,16 +170,15 @@ def parse_requirements(
index_urls = {}
if get_index_urls:
- index_urls = get_index_urls(
- ctx,
- # Use list({}) as a way to have a set
- list({
- req.distribution: None
- for reqs in requirements_by_platform.values()
- for req in reqs.values()
- if not req.srcs.url
- }),
- )
+ distributions = {}
+ for reqs in requirements_by_platform.values():
+ for req in reqs.values():
+ if req.srcs.url:
+ continue
+
+ distributions.setdefault(req.distribution, []).append(req.srcs.version)
+
+ index_urls = get_index_urls(ctx, distributions)
ret = []
for name, reqs in sorted(requirements_by_platform.items()):
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index a41f0750c4..da11a77635 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -16,12 +16,14 @@
Parse SimpleAPI HTML in Starlark.
"""
-def parse_simpleapi_html(*, url, content):
+def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
"""Get the package URLs for given shas by parsing the Simple API HTML.
Args:
url(str): The URL that the HTML content can be downloaded from.
+ distribution(str): TODO
content(str): The Simple API HTML content.
+ return_absolute: {type}`bool` TODO
Returns:
A list of structs with:
@@ -33,6 +35,9 @@ def parse_simpleapi_html(*, url, content):
present, then the 'metadata_url' is also present. Defaults to "".
* metadata_url: The URL for the METADATA if we can download it. Defaults to "".
"""
+ if not distribution:
+ _, _, distribution = url.strip("/").rpartition("/")
+
sdists = {}
whls = {}
lines = content.split("")
maybe_metadata, _, filename = head.rpartition(">")
- version = _version(filename)
+ version = pkg_version(filename, distribution)
sha256s_by_version.setdefault(version, []).append(sha256)
metadata_sha256 = ""
@@ -79,13 +85,17 @@ def parse_simpleapi_html(*, url, content):
break
if filename.endswith(".whl"):
+ metadata_url = metadata_url or ""
+ if return_absolute and metadata_url:
+ metadata_url = absolute_url(index_url = url, url = metadata_url)
+
whls[sha256] = struct(
filename = filename,
version = version,
url = dist_url,
sha256 = sha256,
metadata_sha256 = metadata_sha256,
- metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
+ metadata_url = metadata_url,
yanked = yanked,
)
else:
@@ -110,18 +120,36 @@ _SDIST_EXTS = [
".zip",
]
-def _version(filename):
+def pkg_version(filename, distribution = None):
+ """pkg_version extracts the version from the filename.
+
+ TODO: move this to a different location
+
+ Args:
+ filename: TODO
+ distribution: TODO
+
+ Returns:
+ version string
+ """
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
- _, _, tail = filename.partition("-")
- version, _, _ = tail.partition("-")
- if version != tail:
- # The format is {name}-{version}-{whl_specifiers}.whl
- return version
+ if filename.endswith(".whl"):
+ _, _, tail = filename.partition("-")
+ version, _, _ = tail.partition("-")
+ if version != tail:
+ # The format is {name}-{version}-{whl_specifiers}.whl
+ return version
+
+ if not distribution:
+ fail("for parsing sdists passing 'distribution' is mandatory")
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
# {name}-{version}.{ext}
+ # TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
+ # get the version from the filename but to pass in the distribution name to this function.
+ version = filename[len(distribution) + 1:]
for ext in _SDIST_EXTS:
version, _, _ = version.partition(ext) # build or name
@@ -147,21 +175,30 @@ def _is_downloadable(url):
"""
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")
-def _absolute_url(index_url, candidate):
- if candidate == "":
- return candidate
+def absolute_url(*, index_url, url):
+ """Return an absolute URL in case the url is not absolute.
+
+ Args:
+ index_url: {type}`str` The index_url.
+ url: {type}`str` The url of the artifact.
+
+ Returns:
+ `url` if it is absolute, or absolute URL based on the `index_url`.
+ """
+ if url == "":
+ return url
- if _is_downloadable(candidate):
- return candidate
+ if _is_downloadable(url):
+ return url
- if candidate.startswith("/"):
+ if url.startswith("/"):
# absolute path
root_directory = _get_root_directory(index_url)
- return "{}{}".format(root_directory, candidate)
+ return "{}{}".format(root_directory, url)
- if candidate.startswith(".."):
+ if url.startswith(".."):
# relative path with up references
- candidate_parts = candidate.split("..")
+ candidate_parts = url.split("..")
last = candidate_parts[-1]
for _ in range(len(candidate_parts) - 1):
index_url, _, _ = index_url.rstrip("/").rpartition("/")
@@ -169,4 +206,4 @@ def _absolute_url(index_url, candidate):
return "{}/{}".format(index_url, last.strip("/"))
# relative path without up-references
- return "{}/{}".format(index_url.rstrip("/"), candidate)
+ return "{}/{}".format(index_url.rstrip("/"), url)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 52ff02a178..f7aea1a7de 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -21,7 +21,9 @@ load("//python/private:auth.bzl", _get_auth = "get_auth")
load("//python/private:envsubst.bzl", "envsubst")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:text_util.bzl", "render")
-load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
+load(":parse_simpleapi_html.bzl", "absolute_url", "parse_simpleapi_html", "pkg_version")
+
+_FACT_VERSION = "v1"
def simpleapi_download(
ctx,
@@ -29,6 +31,7 @@ def simpleapi_download(
attr,
cache,
parallel_download = True,
+ store_facts = True,
read_simpleapi = None,
get_auth = None,
_fail = fail):
@@ -43,12 +46,13 @@ def simpleapi_download(
separate packages.
* extra_index_urls: Extra index URLs that will be looked up after
the main is looked up.
- * sources: list[str], the sources to download things for. Each value is
- the contents of requirements files.
+ * sources: list[str] | dict[str, list[str]], the sources to download things for. Each
+ value is the contents of requirements files.
* envsubst: list[str], the envsubst vars for performing substitution in index url.
* netrc: The netrc parameter for ctx.download, see http_file for docs.
* auth_patterns: The auth_patterns parameter for ctx.download, see
http_file for docs.
+ * facts: The facts to write to if we support them.
cache: A dictionary that can be used as a cache between calls during a
single evaluation of the extension. We use a dictionary as a cache
so that we can reuse calls to the simple API when evaluating the
@@ -58,6 +62,9 @@ def simpleapi_download(
reflected when re-evaluating the extension unless we do
`bazel clean --expunge`.
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
+ store_facts: A boolean to enable usage of bazel 8.5 facts feature to store data in
+ MODULE.bazel.lock in between the runs. Can be disabled if urls contain sensitive
+ data and users would rather opt-out of the feature.
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
Used in tests.
get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
@@ -80,28 +87,36 @@ def simpleapi_download(
contents = {}
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
+ if hasattr(ctx, "facts") and store_facts:
+ download_kwargs["facts"] = _facts(ctx, attr.facts)
+ read_simpleapi = _read_simpleapi_with_facts
+ ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
+ else:
+ ctx.report_progress("Fetch package lists from PyPI index")
found_on_index = {}
warn_overrides = False
- ctx.report_progress("Fetch package lists from PyPI index")
+
+ # Normalize the inputs
+ input_sources = {pkg: [] for pkg in attr.sources} if type(attr.sources) == "list" else attr.sources
+
for i, index_url in enumerate(index_urls):
if i != 0:
# Warn the user about a potential fix for the overrides
warn_overrides = True
async_downloads = {}
- sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
- for pkg in sources:
+ sources = {pkg: versions for pkg, versions in input_sources.items() if pkg not in found_on_index}
+ for pkg, versions in sources.items():
pkg_normalized = normalize_name(pkg)
result = read_simpleapi(
ctx = ctx,
- url = "{}/{}/".format(
- index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
- pkg,
- ),
attr = attr,
cache = cache,
+ index_url = index_url_overrides.get(pkg_normalized, index_url),
+ distribution = pkg,
get_auth = get_auth,
+ requested_versions = versions,
**download_kwargs
)
if hasattr(result, "wait"):
@@ -109,6 +124,7 @@ def simpleapi_download(
async_downloads[pkg] = struct(
pkg_normalized = pkg_normalized,
wait = result.wait,
+ fns = result.fns,
)
elif result.success:
contents[pkg_normalized] = result.output
@@ -164,49 +180,14 @@ If you would like to skip downloading metadata for these packages please add 'si
return contents
-def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
- """Read SimpleAPI.
-
- Args:
- ctx: The module_ctx or repository_ctx.
- url: str, the url parameter that can be passed to ctx.download.
- attr: The attribute that contains necessary info for downloading. The
- following attributes must be present:
- * envsubst: The envsubst values for performing substitutions in the URL.
- * netrc: The netrc parameter for ctx.download, see http_file for docs.
- * auth_patterns: The auth_patterns parameter for ctx.download, see
- http_file for docs.
- cache: A dict for storing the results.
- get_auth: A function to get auth information. Used in tests.
- **download_kwargs: Any extra params to ctx.download.
- Note that output and auth will be passed for you.
-
- Returns:
- A similar object to what `download` would return except that in result.out
- will be the parsed simple api contents.
- """
- # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
- # the whl location and we cannot handle multiple URLs at once by passing
- # them to ctx.download if we want to correctly handle the relative URLs.
- # TODO: Add a test that env subbed index urls do not leak into the lock file.
-
- real_url = strip_empty_path_segments(envsubst(
- url,
- attr.envsubst,
- ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
- ))
-
- cache_key = real_url
- if cache_key in cache:
- return struct(success = True, output = cache[cache_key])
-
+def _download_simpleapi(*, ctx, url, real_url, attr_envsubst, get_auth, **kwargs):
output_str = envsubst(
url,
- attr.envsubst,
+ attr_envsubst,
# Use env names in the subst values - this will be unique over
# the lifetime of the execution of this function and we also use
# `~` as the separator to ensure that we don't get clashes.
- {e: "~{}~".format(e) for e in attr.envsubst}.get,
+ {e: "~{}~".format(e) for e in attr_envsubst}.get,
)
# Transform the URL into a valid filename
@@ -217,22 +198,50 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
get_auth = get_auth or _get_auth
- # NOTE: this may have block = True or block = False in the download_kwargs
+ # NOTE: this may have block = True or block = False in the kwargs
download = ctx.download(
url = [real_url],
output = output,
auth = get_auth(ctx, [real_url], ctx_attr = attr),
allow_fail = True,
- **download_kwargs
+ **kwargs
)
- if download_kwargs.get("block") == False:
- # Simulate the same API as ctx.download has
+ return _await(
+ download,
+ _read,
+ ctx = ctx,
+ output = output,
+ )
+
+def _await(download, fn, **kwargs):
+ if hasattr(download, "fns"):
+ download.fns.append(
+ lambda result: fn(result = result, **kwargs),
+ )
+ return download
+ elif hasattr(download, "wait"):
+ # Have a reference type which we can iterate later when aggregating the result
+ fns = [lambda result: fn(result = result, **kwargs)]
+
+ def wait():
+ result = download.wait()
+ for fn in fns:
+ result = fn(result = result)
+ return result
+
return struct(
- wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key),
+ wait = wait,
+ fns = fns,
)
- return _read_index_result(ctx, download, output, real_url, cache, cache_key)
+ return fn(result = download, **kwargs)
+
+def _read(ctx, result, output):
+ if not result.success:
+ return result
+
+ return struct(success = True, output = ctx.read(output))
def strip_empty_path_segments(url):
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.
@@ -255,15 +264,232 @@ def strip_empty_path_segments(url):
else:
return "{}://{}".format(scheme, stripped)
-def _read_index_result(ctx, result, output, url, cache, cache_key):
- if not result.success:
+def _read_simpleapi(ctx, index_url, pkg, attr, cache, get_auth = None, return_absolute = True, **download_kwargs):
+ """Read SimpleAPI.
+
+ Args:
+ ctx: The module_ctx or repository_ctx.
+ index_url: str, the PyPI SimpleAPI index URL
+ pkg: str, the distribution to download
+ attr: The attribute that contains necessary info for downloading. The
+ following attributes must be present:
+ * envsubst: The envsubst values for performing substitutions in the URL.
+ * netrc: The netrc parameter for ctx.download, see http_file for docs.
+ * auth_patterns: The auth_patterns parameter for ctx.download, see
+ http_file for docs.
+ cache: A dict for storing the results.
+ get_auth: A function to get auth information. Used in tests.
+ return_absolute: TODO
+ **download_kwargs: Any extra params to ctx.download.
+ Note that output and auth will be passed for you.
+
+ Returns:
+ A similar object to what `download` would return except that in result.out
+ will be the parsed simple api contents.
+ """
+ # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
+ # the whl location and we cannot handle multiple URLs at once by passing
+ # them to ctx.download if we want to correctly handle the relative URLs.
+ # TODO: Add a test that env subbed index urls do not leak into the lock file.
+
+ url = "{}/{}/".format(index_url.rstrip("/"), pkg)
+
+ real_url = strip_empty_path_segments(envsubst(
+ url,
+ attr.envsubst,
+ ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
+ ))
+ cache_key = real_url
+
+ cached = cache.get(cache_key)
+ if cached:
+ return struct(success = True, output = cached)
+
+ download = _download_simpleapi(
+ ctx = ctx,
+ url = url,
+ real_url = real_url,
+ attr_envsubst = attr.envsubst,
+ get_auth = get_auth,
+ **download_kwargs
+ )
+
+ return _await(
+ download,
+ _read_index_result,
+ url = real_url,
+ cache = cache,
+ cache_key = cache_key,
+ return_absolute = return_absolute,
+ )
+
+def _read_index_result(*, result, url, cache, cache_key, return_absolute):
+ if not result.success or not result.output:
return struct(success = False)
- content = ctx.read(output)
+ output = parse_simpleapi_html(url = url, content = result.output, return_absolute = return_absolute)
+ if not output:
+ return struct(success = False)
- output = parse_simpleapi_html(url = url, content = content)
- if output:
- cache.setdefault(cache_key, output)
- return struct(success = True, output = output, cache_key = cache_key)
- else:
+ cache.setdefault(cache_key, output)
+ return struct(success = True, output = output)
+
+def _read_simpleapi_with_facts(ctx, index_url, distribution, facts = None, requested_versions = [], **download_kwargs):
+ """Read SimpleAPI or pull data from the known facts in the MODULE.bazel.lock file.
+
+ Args:
+ ctx: The module_ctx or repository_ctx.
+ index_url: TODO
+ distribution: TODO
+ requested_versions: the list of requested versions.
+ facts: Facts to write back if we support facts.
+ **download_kwargs: Params passed to the _read_simpleapi.
+
+ Returns:
+ A similar object to what `download` would return except that in result.out
+ will be the parsed simple api contents. We return only the `requested_versions` so that
+ the list of facts stored in the lock file is limited.
+ """
+ cached = facts.get(index_url, distribution, requested_versions)
+ if cached:
+ return struct(
+ success = True,
+ output = cached,
+ )
+
+ # Fallback to pulling data from memory or downloading from the SimpleAPI
+ download = _read_simpleapi(
+ ctx,
+ index_url = index_url,
+ distribution = distribution,
+ return_absolute = False,
+ **download_kwargs
+ )
+
+ return _await(
+ download,
+ _read_index_result_with_facts,
+ facts = facts,
+ requested_versions = requested_versions,
+ index_url = index_url,
+ )
+
+def _read_index_result_with_facts(*, result, facts, requested_versions, index_url):
+ if not result.success or not result.output:
return struct(success = False)
+
+ output = _filter_packages(result.output, requested_versions)
+ facts.setdefault(index_url, output)
+ return struct(success = True, output = output)
+
+def _filter_packages(dists, requested_versions):
+ sha256s_by_version = {}
+ whls = {}
+ sdists = {}
+ for sha256, d in (dists.sdists | dists.whls).items():
+ if d.version not in requested_versions:
+ continue
+
+ if d.filename.endswith(".whl"):
+ whls[sha256] = d
+ else:
+ sdists[sha256] = d
+
+ sha256s_by_version.setdefault(d.version, []).append(sha256)
+
+ return struct(
+ whls = whls,
+ sdists = sdists,
+ sha256s_by_version = sha256s_by_version,
+ )
+
+def _facts(ctx, store_facts, facts_version = _FACT_VERSION):
+ known_facts = getattr(ctx, "facts", None)
+ if not known_facts:
+ return {}
+
+ return struct(
+ get = lambda index_url, distribution, versions: _get_from_facts(
+ store_facts,
+ known_facts,
+ index_url,
+ distribution,
+ versions,
+ facts_version,
+ ),
+ setdefault = lambda url, value: _store_facts(store_facts, facts_version, url, value),
+ )
+
+def _get_from_facts(facts, known_facts, index_url, distribution, requested_versions, facts_version):
+ if known_facts.get("fact_version") != facts_version:
+ # cannot trust known facts, different version that we know how to parse
+ return None
+
+ known_sources = {}
+
+ known_facts = known_facts.get(index_url, {})
+
+ index_url_for_distro = "{}/{}/".format(index_url, distribution)
+ for url, sha256 in known_facts.get("dist_hashes", {}).items():
+ filename = known_facts.get("dist_filenames", {}).get(sha256)
+ if not filename:
+ _, _, filename = url.rpartition("/")
+
+ version = pkg_version(filename, distribution)
+ if version not in requested_versions:
+ # TODO @aignas 2026-01-21: do the check by requested shas at some point
+ # We don't have sufficient info in the lock file, need to call the API
+ #
+ continue
+
+ if filename.endswith(".whl"):
+ dists = known_sources.setdefault("whls", {})
+ else:
+ dists = known_sources.setdefault("sdists", {})
+
+ known_sources.setdefault("sha256s_by_version", {}).setdefault(version, []).append(sha256)
+
+ dists.setdefault(sha256, struct(
+ sha256 = sha256,
+ filename = filename,
+ version = version,
+ url = absolute_url(index_url = index_url_for_distro, url = url),
+ metadata_sha256 = "",
+ metadata_url = "",
+ yanked = known_facts.get("dist_yanked", {}).get(sha256, False),
+ ))
+
+ if not known_sources:
+ return None
+
+ output = struct(
+ whls = known_sources.get("whls", {}),
+ sdists = known_sources.get("sdists", {}),
+ sha256s_by_version = known_sources.get("sha256s_by_version", {}),
+ )
+ _store_facts(facts, facts_version, index_url, output)
+ return output
+
+def _store_facts(facts, fact_version, index_url, value):
+ """Store values as facts in the lock file.
+
+ The main idea is to ensure that the lock file is small and it is only storing what
+ we would need to fetch from the internet. Any derivative information we can
+ from this that can be achieved using pure Starlark functions should be done in
+ Starlark.
+ """
+
+ facts["fact_version"] = fact_version
+
+ # Store the distributions by index URL that we find them on.
+ facts = facts.setdefault(index_url, {})
+
+ for sha256, d in (value.sdists | value.whls).items():
+ facts.setdefault("dist_hashes", {}).setdefault(d.url, sha256)
+ if not d.url.endswith(d.filename):
+ facts.setdefault("dist_filenames", {}).setdefault(d.url, d.filename)
+ if d.yanked:
+ # TODO @aignas 2026-01-21: store yank reason
+ facts.setdefault("dist_yanked", {}).setdefault(sha256, True)
+
+ return value
From b78cf22beb6ac3c5da6a003c91d2ec9e256bb6f3 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 2 Feb 2026 22:09:42 +0900
Subject: [PATCH 02/23] fixup
---
python/private/pypi/simpleapi_download.bzl | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index f7aea1a7de..303affca1d 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -264,13 +264,13 @@ def strip_empty_path_segments(url):
else:
return "{}://{}".format(scheme, stripped)
-def _read_simpleapi(ctx, index_url, pkg, attr, cache, get_auth = None, return_absolute = True, **download_kwargs):
+def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None, return_absolute = True, **download_kwargs):
"""Read SimpleAPI.
Args:
ctx: The module_ctx or repository_ctx.
index_url: str, the PyPI SimpleAPI index URL
- pkg: str, the distribution to download
+ distribution: str, the distribution to download
attr: The attribute that contains necessary info for downloading. The
following attributes must be present:
* envsubst: The envsubst values for performing substitutions in the URL.
@@ -292,7 +292,7 @@ def _read_simpleapi(ctx, index_url, pkg, attr, cache, get_auth = None, return_ab
# them to ctx.download if we want to correctly handle the relative URLs.
# TODO: Add a test that env subbed index urls do not leak into the lock file.
- url = "{}/{}/".format(index_url.rstrip("/"), pkg)
+ url = "{}/{}/".format(index_url.rstrip("/"), distribution)
real_url = strip_empty_path_segments(envsubst(
url,
From 28f3871c6bb1bd68cb9399698140ec6b00abe859 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 3 Feb 2026 22:54:45 +0900
Subject: [PATCH 03/23] filter packages by requested versions
---
python/private/pypi/simpleapi_download.bzl | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 303affca1d..4757b83db6 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -264,7 +264,7 @@ def strip_empty_path_segments(url):
else:
return "{}://{}".format(scheme, stripped)
-def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None, return_absolute = True, **download_kwargs):
+def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None, return_absolute = True, requested_versions = [], **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -279,6 +279,7 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None,
http_file for docs.
cache: A dict for storing the results.
get_auth: A function to get auth information. Used in tests.
+ requested_versions: the list of requested versions.
return_absolute: TODO
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
@@ -303,6 +304,7 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None,
cached = cache.get(cache_key)
if cached:
+ cached = _filter_packages(cached, requested_versions)
return struct(success = True, output = cached)
download = _download_simpleapi(
@@ -321,9 +323,10 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None,
cache = cache,
cache_key = cache_key,
return_absolute = return_absolute,
+ requested_versions = requested_versions,
)
-def _read_index_result(*, result, url, cache, cache_key, return_absolute):
+def _read_index_result(*, result, url, cache, cache_key, return_absolute, requested_versions):
if not result.success or not result.output:
return struct(success = False)
@@ -332,6 +335,7 @@ def _read_index_result(*, result, url, cache, cache_key, return_absolute):
return struct(success = False)
cache.setdefault(cache_key, output)
+ output = _filter_packages(output, requested_versions)
return struct(success = True, output = output)
def _read_simpleapi_with_facts(ctx, index_url, distribution, facts = None, requested_versions = [], **download_kwargs):
From fd13f9082f95aa69c0f283f659ee8950781099ee Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Thu, 5 Feb 2026 21:38:49 +0900
Subject: [PATCH 04/23] fix unit tests
---
tests/pypi/hub_builder/hub_builder_tests.bzl | 8 +++++++-
.../simpleapi_download/simpleapi_download_tests.bzl | 10 ++++++++--
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 03cefd13c5..f73e23ba37 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -1036,7 +1036,13 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef
index_url = "pypi.org",
index_url_overrides = {},
netrc = None,
- sources = ["simple", "plat_pkg", "pip_fallback", "some_other_pkg"],
+ facts = None,
+ sources = {
+ "pip_fallback": ["0.0.1"],
+ "plat_pkg": ["0.0.4"],
+ "simple": ["0.0.1"],
+ "some_other_pkg": ["0.0.1"],
+ },
),
"cache": {},
"parallel_download": False,
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 8dc307235a..e8013c3ab7 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -22,12 +22,15 @@ _tests = []
def _test_simple(env):
calls = []
- def read_simpleapi(ctx, url, attr, cache, get_auth, block):
+ def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
_ = ctx # buildifier: disable=unused-variable
+ _ = distribution
+ _ = requested_versions
_ = attr
_ = cache
_ = get_auth
env.expect.that_bool(block).equals(False)
+ url = "{}/{}/".format(index_url, distribution)
calls.append(url)
if "foo" in url and "main" in url:
return struct(
@@ -75,11 +78,14 @@ def _test_fail(env):
calls = []
fails = []
- def read_simpleapi(ctx, url, attr, cache, get_auth, block):
+ def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
_ = ctx # buildifier: disable=unused-variable
+ _ = distribution
+ _ = requested_versions
_ = attr
_ = cache
_ = get_auth
+ url = "{}/{}/".format(index_url, distribution)
env.expect.that_bool(block).equals(False)
calls.append(url)
if "foo" in url:
From a11735fe7596770b1c63e0015ff274190cfba356 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Thu, 5 Feb 2026 23:40:17 +0900
Subject: [PATCH 05/23] fix requested versions
---
python/private/pypi/simpleapi_download.bzl | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 4757b83db6..9b6f0844b4 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -116,7 +116,7 @@ def simpleapi_download(
index_url = index_url_overrides.get(pkg_normalized, index_url),
distribution = pkg,
get_auth = get_auth,
- requested_versions = versions,
+ requested_versions = {v: None for v in versions},
**download_kwargs
)
if hasattr(result, "wait"):
@@ -264,7 +264,7 @@ def strip_empty_path_segments(url):
else:
return "{}://{}".format(scheme, stripped)
-def _read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth = None, return_absolute = True, requested_versions = [], **download_kwargs):
+def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_versions, get_auth = None, return_absolute = True, **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -366,6 +366,7 @@ def _read_simpleapi_with_facts(ctx, index_url, distribution, facts = None, reque
ctx,
index_url = index_url,
distribution = distribution,
+ requested_versions = requested_versions,
return_absolute = False,
**download_kwargs
)
From db1e48087d87e8e74fd33e38f3c2fb31aac5cc06 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 7 Feb 2026 12:55:56 +0900
Subject: [PATCH 06/23] remove a bool toggle
---
python/private/pypi/simpleapi_download.bzl | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 9b6f0844b4..18065b5adb 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -31,7 +31,6 @@ def simpleapi_download(
attr,
cache,
parallel_download = True,
- store_facts = True,
read_simpleapi = None,
get_auth = None,
_fail = fail):
@@ -62,9 +61,6 @@ def simpleapi_download(
reflected when re-evaluating the extension unless we do
`bazel clean --expunge`.
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
- store_facts: A boolean to enable usage of bazel 8.5 facts feature to store data in
- MODULE.bazel.lock in between the runs. Can be disabled if urls contain sensitive
- data and users would rather opt-out of the feature.
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
Used in tests.
get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
@@ -87,7 +83,7 @@ def simpleapi_download(
contents = {}
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
- if hasattr(ctx, "facts") and store_facts:
+ if hasattr(ctx, "facts"):
download_kwargs["facts"] = _facts(ctx, attr.facts)
read_simpleapi = _read_simpleapi_with_facts
ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
From 5d3f8bff221fdf3cb5976e233cd5c38f64fdd123 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 7 Feb 2026 13:16:06 +0900
Subject: [PATCH 07/23] wip
---
python/private/pypi/simpleapi_download.bzl | 131 +++++++++++----------
1 file changed, 66 insertions(+), 65 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 18065b5adb..2e598fbbfe 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -83,18 +83,16 @@ def simpleapi_download(
contents = {}
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
- if hasattr(ctx, "facts"):
- download_kwargs["facts"] = _facts(ctx, attr.facts)
- read_simpleapi = _read_simpleapi_with_facts
- ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
- else:
- ctx.report_progress("Fetch package lists from PyPI index")
+ cache = _cache(ctx, cache, attr.facts)
found_on_index = {}
warn_overrides = False
# Normalize the inputs
- input_sources = {pkg: [] for pkg in attr.sources} if type(attr.sources) == "list" else attr.sources
+ if type(attr.sources) == "list":
+ fail("TODO")
+ else:
+ input_sources = attr.sources
for i, index_url in enumerate(index_urls):
if i != 0:
@@ -284,24 +282,24 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_version
A similar object to what `download` would return except that in result.out
will be the parsed simple api contents.
"""
+
+ index_url = index_url.rstrip("/")
+
# NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
# the whl location and we cannot handle multiple URLs at once by passing
# them to ctx.download if we want to correctly handle the relative URLs.
# TODO: Add a test that env subbed index urls do not leak into the lock file.
- url = "{}/{}/".format(index_url.rstrip("/"), distribution)
+ cached = cache.get(index_url, distribution, requested_versions)
+ if cached:
+ return struct(success = True, output = cached)
+ url = "{}/{}/".format(index_url, distribution)
real_url = strip_empty_path_segments(envsubst(
url,
attr.envsubst,
ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
))
- cache_key = real_url
-
- cached = cache.get(cache_key)
- if cached:
- cached = _filter_packages(cached, requested_versions)
- return struct(success = True, output = cached)
download = _download_simpleapi(
ctx = ctx,
@@ -315,75 +313,78 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_version
return _await(
download,
_read_index_result,
- url = real_url,
+ index_url = index_url,
+ distribution = distribution,
+ real_url = real_url,
cache = cache,
- cache_key = cache_key,
return_absolute = return_absolute,
requested_versions = requested_versions,
)
-def _read_index_result(*, result, url, cache, cache_key, return_absolute, requested_versions):
+def _read_index_result(*, result, index_url, distribution, real_url, cache, return_absolute, requested_versions):
if not result.success or not result.output:
return struct(success = False)
- output = parse_simpleapi_html(url = url, content = result.output, return_absolute = return_absolute)
+ output = parse_simpleapi_html(url = real_url, content = result.output, return_absolute = return_absolute)
if not output:
return struct(success = False)
- cache.setdefault(cache_key, output)
- output = _filter_packages(output, requested_versions)
+ cache.setdefault(index_url, distribution, requested_versions, output)
return struct(success = True, output = output)
-def _read_simpleapi_with_facts(ctx, index_url, distribution, facts = None, requested_versions = [], **download_kwargs):
- """Read SimpleAPI or pull data from the known facts in the MODULE.bazel.lock file.
+def _cache(ctx, cache, facts):
+ if hasattr(ctx, "facts"):
+ facts = _facts(ctx, facts)
+ ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
+ else:
+ ctx.report_progress("Fetch package lists from PyPI index")
- Args:
- ctx: The module_ctx or repository_ctx.
- index_url: TODO
- distribution: TODO
- requested_versions: the list of requested versions.
- facts: Facts to write back if we support facts.
- **download_kwargs: Params passed to the _read_simpleapi.
+ return struct(
+ get = lambda index_url, distribution, versions: _cache_get(
+ cache,
+ facts,
+ index_url,
+ distribution,
+ versions,
+ ),
+ setdefault = lambda index_url, distribution, versions, value: _cache_setdefault(
+ cache,
+ facts,
+ index_url,
+ distribution,
+ versions,
+ value,
+ ),
+ )
- Returns:
- A similar object to what `download` would return except that in result.out
- will be the parsed simple api contents. We return only the `requested_versions` so that
- the list of facts stored in the lock file is limited.
- """
- cached = facts.get(index_url, distribution, requested_versions)
- if cached:
- return struct(
- success = True,
- output = cached,
- )
+def _cache_get(cache, facts, index_url, distribution, versions):
+ if facts:
+ cached = facts.get(index_url, distribution, versions)
+ if cached:
+ return cached
- # Fallback to pulling data from memory or downloading from the SimpleAPI
- download = _read_simpleapi(
- ctx,
- index_url = index_url,
- distribution = distribution,
- requested_versions = requested_versions,
- return_absolute = False,
- **download_kwargs
- )
+ cached = cache.get((index_url, distribution))
+ if not cached:
+ return None
- return _await(
- download,
- _read_index_result_with_facts,
- facts = facts,
- requested_versions = requested_versions,
- index_url = index_url,
- )
+ cached = _filter_packages(cached, versions)
+ if facts:
+ # Ensure that we write back to the facts, this happens if we request versions that
+ # we don't have facts for but we have in-memory cache of SimpleAPI query results
+ facts.setdefault(index_url, distribution, versions, cached)
+ return cached
-def _read_index_result_with_facts(*, result, facts, requested_versions, index_url):
- if not result.success or not result.output:
- return struct(success = False)
+def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
+ cache.setdefault((index_url, distribution), value)
+ value = _filter_packages(value, versions)
- output = _filter_packages(result.output, requested_versions)
- facts.setdefault(index_url, output)
- return struct(success = True, output = output)
+ if facts:
+ facts.setdefault(index_url, distribution, value)
def _filter_packages(dists, requested_versions):
+ if dists == None:
+ return None
+
sha256s_by_version = {}
whls = {}
sdists = {}
@@ -404,21 +405,21 @@ def _filter_packages(dists, requested_versions):
sha256s_by_version = sha256s_by_version,
)
-def _facts(ctx, store_facts, facts_version = _FACT_VERSION):
+def _facts(ctx, facts, facts_version = _FACT_VERSION):
known_facts = getattr(ctx, "facts", None)
if not known_facts:
return {}
return struct(
get = lambda index_url, distribution, versions: _get_from_facts(
- store_facts,
+ facts,
known_facts,
index_url,
distribution,
versions,
facts_version,
),
- setdefault = lambda url, value: _store_facts(store_facts, facts_version, url, value),
+ setdefault = lambda url, distribution, value: _store_facts(facts, facts_version, url, value),
)
def _get_from_facts(facts, known_facts, index_url, distribution, requested_versions, facts_version):
From a84d8cb4e720b2a0531a164a6e03b8bf2c6e553d Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 7 Feb 2026 13:44:11 +0900
Subject: [PATCH 08/23] wip
---
python/private/pypi/simpleapi_download.bzl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 2e598fbbfe..1d660b7c90 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -371,7 +371,7 @@ def _cache_get(cache, facts, index_url, distribution, versions):
if facts:
# Ensure that we write back to the facts, this happens if we request versions that
# we don't have facts for but we have in-memory cache of SimpleAPI query results
- facts.setdefault(index_url, distribution, versions, cached)
+ facts.setdefault(index_url, distribution, cached)
return cached
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
From 4fb0d0ea0166a3cfd65a880a16867bb8caa8e808 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sun, 8 Feb 2026 01:06:02 +0900
Subject: [PATCH 09/23] wip
---
python/private/pypi/simpleapi_download.bzl | 54 +++++--
.../simpleapi_download_tests.bzl | 137 +++++++++++++++++-
2 files changed, 169 insertions(+), 22 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 1d660b7c90..837a470e8e 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -83,7 +83,13 @@ def simpleapi_download(
contents = {}
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
- cache = _cache(ctx, cache, attr.facts)
+
+ if attr.facts:
+ ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
+ else:
+ ctx.report_progress("Fetch package lists from PyPI index")
+
+ cache = simpleapi_cache(cache, getattr(ctx, "facts", None), attr.facts)
found_on_index = {}
warn_overrides = False
@@ -332,12 +338,21 @@ def _read_index_result(*, result, index_url, distribution, real_url, cache, retu
cache.setdefault(index_url, distribution, requested_versions, output)
return struct(success = True, output = output)
-def _cache(ctx, cache, facts):
- if hasattr(ctx, "facts"):
- facts = _facts(ctx, facts)
- ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
- else:
- ctx.report_progress("Fetch package lists from PyPI index")
+def simpleapi_cache(cache = None, known_facts = None, facts = None):
+ """SimpleAPI cache for making fewer calls.
+
+ Args:
+ cache: the storage to store things in memory.
+ known_facts: the storage to retrieve known facts.
+ facts: the storage to store things in the lock file after we evaluate the extension.
+
+ Returns:
+ struct with 2 methods, `get` and `setdefault`.
+ """
+ if cache == None:
+ cache = {}
+ if facts != None:
+ facts = _facts(known_facts, facts)
return struct(
get = lambda index_url, distribution, versions: _cache_get(
@@ -388,27 +403,32 @@ def _filter_packages(dists, requested_versions):
sha256s_by_version = {}
whls = {}
sdists = {}
- for sha256, d in (dists.sdists | dists.whls).items():
+ for sha256, d in dists.sdists.items():
if d.version not in requested_versions:
continue
- if d.filename.endswith(".whl"):
- whls[sha256] = d
- else:
- sdists[sha256] = d
+ sdists[sha256] = d
+ sha256s_by_version.setdefault(d.version, []).append(sha256)
+
+ for sha256, d in dists.whls.items():
+ if d.version not in requested_versions:
+ continue
+ whls[sha256] = d
sha256s_by_version.setdefault(d.version, []).append(sha256)
+ if not whls and not sdists:
+ return None
+
return struct(
whls = whls,
sdists = sdists,
sha256s_by_version = sha256s_by_version,
)
-def _facts(ctx, facts, facts_version = _FACT_VERSION):
- known_facts = getattr(ctx, "facts", None)
- if not known_facts:
- return {}
+def _facts(known_facts, facts, facts_version = _FACT_VERSION):
+ if known_facts == None:
+ return None
return struct(
get = lambda index_url, distribution, versions: _get_from_facts(
@@ -480,6 +500,8 @@ def _store_facts(facts, fact_version, index_url, value):
from this that can be achieved using pure Starlark functions should be done in
Starlark.
"""
+ if not value:
+ return value
facts["fact_version"] = fact_version
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index e8013c3ab7..0da3a1e16a 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -15,7 +15,13 @@
""
load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
+load("@rules_testing//lib:truth.bzl", "subjects")
+load(
+ "//python/private/pypi:simpleapi_download.bzl",
+ "simpleapi_cache",
+ "simpleapi_download",
+ "strip_empty_path_segments",
+) # buildifier: disable=bzl-visibility
_tests = []
@@ -52,8 +58,9 @@ def _test_simple(env):
index_url_overrides = {},
index_url = "main",
extra_index_urls = ["extra"],
- sources = ["foo", "bar", "baz"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = [],
+ facts = None,
),
cache = {},
parallel_download = True,
@@ -115,8 +122,9 @@ def _test_fail(env):
},
index_url = "main",
extra_index_urls = ["extra"],
- sources = ["foo", "bar", "baz"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = [],
+ facts = None,
),
cache = {},
parallel_download = True,
@@ -168,8 +176,9 @@ def _test_download_url(env):
index_url_overrides = {},
index_url = "https://example.com/main/simple/",
extra_index_urls = [],
- sources = ["foo", "bar", "baz"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = [],
+ facts = None,
),
cache = {},
parallel_download = False,
@@ -204,8 +213,9 @@ def _test_download_url_parallel(env):
index_url_overrides = {},
index_url = "https://example.com/main/simple/",
extra_index_urls = [],
- sources = ["foo", "bar", "baz"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = [],
+ facts = None,
),
cache = {},
parallel_download = True,
@@ -240,8 +250,9 @@ def _test_download_envsubst_url(env):
index_url_overrides = {},
index_url = "$INDEX_URL",
extra_index_urls = [],
- sources = ["foo", "bar", "baz"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = ["INDEX_URL"],
+ facts = None,
),
cache = {},
parallel_download = False,
@@ -266,6 +277,120 @@ def _test_strip_empty_path_segments(env):
_tests.append(_test_strip_empty_path_segments)
+def _expect_cache_result(env, cache, key, sdists, whls):
+ got = env.expect.that_struct(
+ cache.get(*key),
+ attrs = dict(
+ whls = subjects.dict,
+ sdists = subjects.dict,
+ ),
+ )
+ got.whls().contains_exactly(whls)
+ got.sdists().contains_exactly(sdists)
+
+def _test_cache_without_facts(env):
+ cache = simpleapi_cache()
+
+ env.expect.that_str(cache.get("index_url", "distro", ["1.0"])).equals(None)
+ cache.setdefault("index_url", "distro", ["1.0"], struct(
+ sdists = {
+ "a": struct(version = "1.0"),
+ },
+ whls = {
+ "b": struct(version = "1.0"),
+ "c": struct(version = "1.1"),
+ "d": struct(version = "1.1"),
+ "e": struct(version = "1.0"),
+ },
+ ))
+ _expect_cache_result(
+ env,
+ cache,
+ key = ("index_url", "distro", ["1.0"]),
+ sdists = {"a": struct(version = "1.0")},
+ whls = {"b": struct(version = "1.0"), "e": struct(version = "1.0")},
+ )
+ env.expect.that_str(cache.get("index_url", "distro", ["1.2"])).equals(None)
+
+ _expect_cache_result(
+ env,
+ cache,
+ key = ("index_url", "distro", ["1.1"]),
+ sdists = {},
+ whls = {"c": struct(version = "1.1"), "d": struct(version = "1.1")},
+ )
+
+_tests.append(_test_cache_without_facts)
+
+def _test_cache_with_facts(env):
+ facts = {}
+ cache = simpleapi_cache(
+ cache = {},
+ known_facts = {},
+ facts = facts,
+ )
+
+ env.expect.that_str(cache.get("index_url", "distro", ["1.0"])).equals(None)
+ cache.setdefault("index_url", "distro", ["1.0"], struct(
+ sdists = {
+ "a": struct(version = "1.0", url = "url//a.tgz", filename = "a.tgz", yanked = False),
+ },
+ whls = {
+ "b": struct(version = "1.0", url = "url//b.whl", filename = "b.whl", yanked = False),
+ "c": struct(version = "1.1", url = "url//c.whl", filename = "c.whl", yanked = False),
+ "d": struct(version = "1.1", url = "url//d.whl", filename = "d.whl", yanked = False),
+ "e": struct(version = "1.0", url = "url//e.whl", filename = "e.whl", yanked = False),
+ },
+ ))
+ _expect_cache_result(
+ env,
+ cache,
+ key = ("index_url", "distro", ["1.0"]),
+ sdists = {
+ "a": struct(version = "1.0", url = "url//a.tgz", filename = "a.tgz", yanked = False),
+ },
+ whls = {
+ "b": struct(version = "1.0", url = "url//b.whl", filename = "b.whl", yanked = False),
+ "e": struct(version = "1.0", url = "url//e.whl", filename = "e.whl", yanked = False),
+ },
+ )
+ env.expect.that_str(cache.get("index_url", "distro", ["1.2"])).equals(None)
+ env.expect.that_dict(facts).contains_exactly({
+ "fact_version": "v1",
+ "index_url": {
+ "dist_hashes": {
+ "url//a.tgz": "a",
+ "url//b.whl": "b",
+ "url//e.whl": "e",
+ },
+ },
+ })
+
+ _expect_cache_result(
+ env,
+ cache,
+ key = ("index_url", "distro", ["1.1"]),
+ sdists = {},
+ whls = {
+ "c": struct(version = "1.1", url = "url//c.whl", filename = "c.whl", yanked = False),
+ "d": struct(version = "1.1", url = "url//d.whl", filename = "d.whl", yanked = False),
+ },
+ )
+ env.expect.that_dict(facts).contains_exactly({
+ "fact_version": "v1",
+ "index_url": {
+ "dist_hashes": {
+ "url//a.tgz": "a",
+ "url//b.whl": "b",
+ "url//c.whl": "c",
+ "url//d.whl": "d",
+ "url//e.whl": "e",
+ },
+ },
+ })
+
+_tests.append(_test_cache_with_facts)
+
def simpleapi_download_test_suite(name):
"""Create the test suite.
From ec50bb5d2c25c853184d94969ae40b6e8149c1f6 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sun, 8 Feb 2026 01:39:36 +0900
Subject: [PATCH 10/23] wip
---
python/private/pypi/simpleapi_download.bzl | 44 +++++++---
.../simpleapi_download_tests.bzl | 81 ++++++++++++++-----
2 files changed, 93 insertions(+), 32 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 837a470e8e..d545ebff87 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -264,7 +264,7 @@ def strip_empty_path_segments(url):
else:
return "{}://{}".format(scheme, stripped)
-def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_versions, get_auth = None, return_absolute = True, **download_kwargs):
+def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_versions, get_auth = None, **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -280,7 +280,6 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_version
cache: A dict for storing the results.
get_auth: A function to get auth information. Used in tests.
requested_versions: the list of requested versions.
- return_absolute: TODO
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
@@ -323,15 +322,15 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_version
distribution = distribution,
real_url = real_url,
cache = cache,
- return_absolute = return_absolute,
requested_versions = requested_versions,
)
-def _read_index_result(*, result, index_url, distribution, real_url, cache, return_absolute, requested_versions):
+def _read_index_result(*, result, index_url, distribution, real_url, cache, requested_versions):
if not result.success or not result.output:
return struct(success = False)
- output = parse_simpleapi_html(url = real_url, content = result.output, return_absolute = return_absolute)
+ # TODO @aignas 2026-02-08: make this the only behaviour, maybe can get rid of `real_url
+ output = parse_simpleapi_html(url = real_url, content = result.output, return_absolute = False)
if not output:
return struct(success = False)
@@ -382,7 +381,7 @@ def _cache_get(cache, facts, index_url, distribution, versions):
if not cached:
return None
- cached = _filter_packages(cached, versions)
+ cached = _filter_packages(cached, versions, index_url, distribution)
if facts:
# Ensure that we write back to the facts, this happens if we request versions that
# we don't have facts for but we have in-memory cache of SimpleAPI query results
@@ -391,12 +390,12 @@ def _cache_get(cache, facts, index_url, distribution, versions):
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
cache.setdefault((index_url, distribution), value)
- value = _filter_packages(value, versions)
+ value = _filter_packages(value, versions, index_url, distribution)
if facts:
facts.setdefault(index_url, distribution, value)
-def _filter_packages(dists, requested_versions):
+def _filter_packages(dists, requested_versions, index_url, distribution):
if dists == None:
return None
@@ -407,14 +406,14 @@ def _filter_packages(dists, requested_versions):
if d.version not in requested_versions:
continue
- sdists[sha256] = d
+ sdists[sha256] = _with_absolute_url(d, index_url, distribution)
sha256s_by_version.setdefault(d.version, []).append(sha256)
for sha256, d in dists.whls.items():
if d.version not in requested_versions:
continue
- whls[sha256] = d
+ whls[sha256] = _with_absolute_url(d, index_url, distribution)
sha256s_by_version.setdefault(d.version, []).append(sha256)
if not whls and not sdists:
@@ -440,6 +439,8 @@ def _facts(known_facts, facts, facts_version = _FACT_VERSION):
facts_version,
),
setdefault = lambda url, distribution, value: _store_facts(facts, facts_version, url, value),
+ known_facts = known_facts,
+ facts = facts,
)
def _get_from_facts(facts, known_facts, index_url, distribution, requested_versions, facts_version):
@@ -476,8 +477,6 @@ def _get_from_facts(facts, known_facts, index_url, distribution, requested_versi
filename = filename,
version = version,
url = absolute_url(index_url = index_url_for_distro, url = url),
- metadata_sha256 = "",
- metadata_url = "",
yanked = known_facts.get("dist_yanked", {}).get(sha256, False),
))
@@ -492,6 +491,27 @@ def _get_from_facts(facts, known_facts, index_url, distribution, requested_versi
_store_facts(facts, facts_version, index_url, output)
return output
+def _with_absolute_url(d, index_url, distribution):
+ index_url_for_distro = "{}/{}/".format(index_url.rstrip("/"), distribution)
+
+ # TODO @aignas 2026-02-08: think of a better way to do this
+ kwargs = dict()
+ for attr in [
+ "sha256",
+ "filename",
+ "version",
+ "metadata_sha256",
+ "metadata_url",
+ "yanked",
+ "url",
+ ]:
+ if hasattr(d, attr):
+ kwargs[attr] = getattr(d, attr)
+ if attr == "url":
+ kwargs[attr] = absolute_url(index_url = index_url_for_distro, url = kwargs[attr])
+
+ return struct(**kwargs)
+
def _store_facts(facts, fact_version, index_url, value):
"""Store values as facts in the lock file.
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 0da3a1e16a..0286916e8a 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -324,22 +324,38 @@ _tests.append(_test_cache_without_facts)
def _test_cache_with_facts(env):
facts = {}
+ cache_storage = {}
+ known_facts = {}
cache = simpleapi_cache(
- cache = {},
- known_facts = {},
+ cache = cache_storage,
+ known_facts = known_facts,
facts = facts,
)
+ sdists = struct(
+ a = struct(version = "1.0", url = "https://index_url/distro/a-1.0.tgz", filename = "a-1.0.tgz", yanked = False, sha256 = "a"),
+ )
+
+ # TODO @aignas 2026-02-08: url is relative
+ # TODO @aignas 2026-02-08: url starts with `/`
+ # TODO @aignas 2026-02-08: url is absolute
+ whls = struct(
+ b = struct(version = "1.0", url = "https://index_url/distro/b-1.0-tail.whl", filename = "b-1.0-tail.whl", yanked = False, sha256 = "b"),
+ c = struct(version = "1.1", url = "https://index_url/distro/c-1.1-tail.whl", filename = "c-1.1-tail.whl", yanked = False, sha256 = "c"),
+ d = struct(version = "1.1", url = "https://index_url/distro/d-1.1-tail.whl", filename = "d-1.1-tail.whl", yanked = False, sha256 = "d"),
+ e = struct(version = "1.0", url = "https://index_url/distro/e-1.0-tail.whl", filename = "e-1.0-tail.whl", yanked = False, sha256 = "e"),
+ )
+
env.expect.that_str(cache.get("index_url", "distro", ["1.0"])).equals(None)
cache.setdefault("index_url", "distro", ["1.0"], struct(
sdists = {
- "a": struct(version = "1.0", url = "url//a.tgz", filename = "a.tgz", yanked = False),
+ "a": sdists.a,
},
whls = {
- "b": struct(version = "1.0", url = "url//b.whl", filename = "b.whl", yanked = False),
- "c": struct(version = "1.1", url = "url//c.whl", filename = "c.whl", yanked = False),
- "d": struct(version = "1.1", url = "url//d.whl", filename = "d.whl", yanked = False),
- "e": struct(version = "1.0", url = "url//e.whl", filename = "e.whl", yanked = False),
+ "b": whls.b,
+ "c": whls.c,
+ "d": whls.d,
+ "e": whls.e,
},
))
_expect_cache_result(
@@ -347,11 +363,11 @@ def _test_cache_with_facts(env):
cache,
key = ("index_url", "distro", ["1.0"]),
sdists = {
- "a": struct(version = "1.0", url = "url//a.tgz", filename = "a.tgz", yanked = False),
+ "a": sdists.a,
},
whls = {
- "b": struct(version = "1.0", url = "url//b.whl", filename = "b.whl", yanked = False),
- "e": struct(version = "1.0", url = "url//e.whl", filename = "e.whl", yanked = False),
+ "b": whls.b,
+ "e": whls.e,
},
)
env.expect.that_str(cache.get("index_url", "distro", ["1.2"])).equals(None)
@@ -359,32 +375,57 @@ def _test_cache_with_facts(env):
"fact_version": "v1",
"index_url": {
"dist_hashes": {
- "url//a.tgz": "a",
- "url//b.whl": "b",
- "url//e.whl": "e",
+ sdists.a.url: "a",
+ whls.b.url: "b",
+ whls.e.url: "e",
+ },
+ },
+ })
+
+ _expect_cache_result(
+ env,
+ cache,
+ key = ("index_url", "distro", ["1.1"]),
+ sdists = {},
+ whls = {
+ "c": whls.c,
+ "d": whls.d,
+ },
+ )
+ env.expect.that_dict(facts).contains_exactly({
+ "fact_version": "v1",
+ "index_url": {
+ "dist_hashes": {
+ sdists.a.url: "a",
+ whls.b.url: "b",
+ whls.c.url: "c",
+ whls.d.url: "d",
+ whls.e.url: "e",
},
},
})
+ # simulate write to MODULE.bazel.lock
+ known_facts.update(facts)
+ facts.clear()
+ cache_storage.clear()
+
_expect_cache_result(
env,
cache,
key = ("index_url", "distro", ["1.1"]),
sdists = {},
whls = {
- "c": struct(version = "1.1", url = "url//c.whl", filename = "c.whl", yanked = False),
- "d": struct(version = "1.1", url = "url//d.whl", filename = "d.whl", yanked = False),
+ "c": whls.c,
+ "d": whls.d,
},
)
env.expect.that_dict(facts).contains_exactly({
"fact_version": "v1",
"index_url": {
"dist_hashes": {
- "url//a.tgz": "a",
- "url//b.whl": "b",
- "url//c.whl": "c",
- "url//d.whl": "d",
- "url//e.whl": "e",
+ whls.c.url: "c",
+ whls.d.url: "d",
},
},
})
From 97b8be1f461de6182c2cbc7deb1d4cf3647976f8 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sun, 8 Feb 2026 23:56:14 +0900
Subject: [PATCH 11/23] finish the in memory
---
python/private/pypi/simpleapi_download.bzl | 116 ++-
.../simpleapi_download_tests.bzl | 659 ++++++++----------
2 files changed, 373 insertions(+), 402 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index d545ebff87..25ff73469b 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -89,7 +89,10 @@ def simpleapi_download(
else:
ctx.report_progress("Fetch package lists from PyPI index")
- cache = simpleapi_cache(cache, getattr(ctx, "facts", None), attr.facts)
+ cache = simpleapi_cache(
+ memory_cache = memory_cache(cache),
+ facts_cache = facts_cache(getattr(ctx, "facts", None), attr.facts),
+ )
found_on_index = {}
warn_overrides = False
@@ -337,33 +340,27 @@ def _read_index_result(*, result, index_url, distribution, real_url, cache, requ
cache.setdefault(index_url, distribution, requested_versions, output)
return struct(success = True, output = output)
-def simpleapi_cache(cache = None, known_facts = None, facts = None):
+def simpleapi_cache(memory_cache, facts_cache):
"""SimpleAPI cache for making fewer calls.
Args:
- cache: the storage to store things in memory.
- known_facts: the storage to retrieve known facts.
- facts: the storage to store things in the lock file after we evaluate the extension.
+ memory_cache: the storage to store things in memory.
+ facts_cache: the storage to retrieve known facts.
Returns:
struct with 2 methods, `get` and `setdefault`.
"""
- if cache == None:
- cache = {}
- if facts != None:
- facts = _facts(known_facts, facts)
-
return struct(
get = lambda index_url, distribution, versions: _cache_get(
- cache,
- facts,
+ memory_cache,
+ facts_cache,
index_url,
distribution,
versions,
),
setdefault = lambda index_url, distribution, versions, value: _cache_setdefault(
- cache,
- facts,
+ memory_cache,
+ facts_cache,
index_url,
distribution,
versions,
@@ -372,20 +369,20 @@ def simpleapi_cache(cache = None, known_facts = None, facts = None):
)
def _cache_get(cache, facts, index_url, distribution, versions):
- if facts:
- cached = facts.get(index_url, distribution, versions)
- if cached:
- return cached
+ if not facts:
+ return cache.get(index_url, distribution, versions)
- cached = cache.get((index_url, distribution))
+ cached = facts.get(index_url, distribution, versions)
+ if cached:
+ return cached
+
+ cached = cache.get(index_url, distribution, versions)
if not cached:
return None
- cached = _filter_packages(cached, versions, index_url, distribution)
- if facts:
- # Ensure that we write back to the facts, this happens if we request versions that
- # we don't have facts for but we have in-memory cache of SimpleAPI query results
- facts.setdefault(index_url, distribution, cached)
+ # Ensure that we write back to the facts, this happens if we request versions that
+ # we don't have facts for but we have in-memory cache of SimpleAPI query results
+ facts.setdefault(index_url, distribution, cached)
return cached
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
@@ -395,10 +392,78 @@ def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
if facts:
facts.setdefault(index_url, distribution, value)
+def memory_cache(cache = None):
+ """SimpleAPI cache for making fewer calls.
+
+ Args:
+ cache: the storage to store things in memory.
+
+ Returns:
+ struct with 2 methods, `get` and `setdefault`.
+ """
+ if cache == None:
+ cache = {}
+
+ return struct(
+ get = lambda index_url, distribution, versions: _memcache_get(
+ cache,
+ index_url,
+ distribution,
+ versions,
+ ),
+ setdefault = lambda index_url, distribution, versions, value: _memcache_setdefault(
+ cache,
+ index_url,
+ distribution,
+ versions,
+ value,
+ ),
+ )
+
+def _vkey(versions):
+ if not versions:
+ return ""
+
+ if len(versions) == 1:
+ return versions[0]
+
+ return ",".join(sorted(versions))
+
+def _memcache_get(cache, index_url, distribution, versions):
+ if not versions:
+ return cache.get((index_url, distribution, ""))
+
+ vkey = _vkey(versions)
+ filtered = cache.get((index_url, distribution, vkey))
+ if filtered:
+ return filtered
+
+ unfiltered = cache.get((index_url, distribution, ""))
+ if not unfiltered:
+ return None
+
+ filtered = _filter_packages(unfiltered, versions, index_url, distribution)
+ cache.setdefault((index_url, distribution, vkey), filtered)
+ return filtered
+
+def _memcache_setdefault(cache, index_url, distribution, versions, value):
+ cache.setdefault((index_url, distribution, ""), value)
+ if not versions:
+ return value
+
+ filtered = _filter_packages(value, versions, index_url, distribution)
+
+ vkey = _vkey(versions)
+ cache.setdefault((index_url, distribution, vkey), filtered)
+ return filtered
+
def _filter_packages(dists, requested_versions, index_url, distribution):
if dists == None:
return None
+ if not requested_versions:
+ return dists
+
sha256s_by_version = {}
whls = {}
sdists = {}
@@ -425,7 +490,7 @@ def _filter_packages(dists, requested_versions, index_url, distribution):
sha256s_by_version = sha256s_by_version,
)
-def _facts(known_facts, facts, facts_version = _FACT_VERSION):
+def facts_cache(known_facts, facts, facts_version = _FACT_VERSION):
if known_facts == None:
return None
@@ -495,6 +560,7 @@ def _with_absolute_url(d, index_url, distribution):
index_url_for_distro = "{}/{}/".format(index_url.rstrip("/"), distribution)
# TODO @aignas 2026-02-08: think of a better way to do this
+ # TODO @aignas 2026-02-08: if the url is absolute, return d
kwargs = dict()
for attr in [
"sha256",
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 0286916e8a..d29a0fb6b0 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -18,265 +18,263 @@ load("@rules_testing//lib:test_suite.bzl", "test_suite")
load("@rules_testing//lib:truth.bzl", "subjects")
load(
"//python/private/pypi:simpleapi_download.bzl",
- "simpleapi_cache",
- "simpleapi_download",
- "strip_empty_path_segments",
+ "memory_cache",
) # buildifier: disable=bzl-visibility
_tests = []
-def _test_simple(env):
- calls = []
-
- def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
- _ = ctx # buildifier: disable=unused-variable
- _ = distribution
- _ = requested_versions
- _ = attr
- _ = cache
- _ = get_auth
- env.expect.that_bool(block).equals(False)
- url = "{}/{}/".format(index_url, distribution)
- calls.append(url)
- if "foo" in url and "main" in url:
- return struct(
- output = "",
- success = False,
- )
- else:
- return struct(
- output = "data from {}".format(url),
- success = True,
- )
-
- contents = simpleapi_download(
- ctx = struct(
- os = struct(environ = {}),
- report_progress = lambda _: None,
- ),
- attr = struct(
- index_url_overrides = {},
- index_url = "main",
- extra_index_urls = ["extra"],
- sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
- envsubst = [],
- facts = None,
- ),
- cache = {},
- parallel_download = True,
- read_simpleapi = read_simpleapi,
- )
-
- env.expect.that_collection(calls).contains_exactly([
- "extra/foo/",
- "main/bar/",
- "main/baz/",
- "main/foo/",
- ])
- env.expect.that_dict(contents).contains_exactly({
- "bar": "data from main/bar/",
- "baz": "data from main/baz/",
- "foo": "data from extra/foo/",
- })
-
-_tests.append(_test_simple)
-
-def _test_fail(env):
- calls = []
- fails = []
-
- def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
- _ = ctx # buildifier: disable=unused-variable
- _ = distribution
- _ = requested_versions
- _ = attr
- _ = cache
- _ = get_auth
- url = "{}/{}/".format(index_url, distribution)
- env.expect.that_bool(block).equals(False)
- calls.append(url)
- if "foo" in url:
- return struct(
- output = "",
- success = False,
- )
- if "bar" in url:
- return struct(
- output = "",
- success = False,
- )
- else:
- return struct(
- output = "data from {}".format(url),
- success = True,
- )
-
- simpleapi_download(
- ctx = struct(
- os = struct(environ = {}),
- report_progress = lambda _: None,
- ),
- attr = struct(
- index_url_overrides = {
- "foo": "invalid",
- },
- index_url = "main",
- extra_index_urls = ["extra"],
- sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
- envsubst = [],
- facts = None,
- ),
- cache = {},
- parallel_download = True,
- read_simpleapi = read_simpleapi,
- _fail = fails.append,
- )
-
- env.expect.that_collection(fails).contains_exactly([
- """
-Failed to download metadata of the following packages from urls:
-{
- "foo": "invalid",
- "bar": ["main", "extra"],
-}
-
-If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
- "foo",
- "bar",
-]' to your 'pip.parse' call.
-""",
- ])
- env.expect.that_collection(calls).contains_exactly([
- "invalid/foo/",
- "main/bar/",
- "main/baz/",
- "invalid/foo/",
- "extra/bar/",
- ])
-
-_tests.append(_test_fail)
-
-def _test_download_url(env):
- downloads = {}
-
- def download(url, output, **kwargs):
- _ = kwargs # buildifier: disable=unused-variable
- downloads[url[0]] = output
- return struct(success = True)
-
- simpleapi_download(
- ctx = struct(
- os = struct(environ = {}),
- download = download,
- report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
- path = lambda i: "path/for/" + i,
- ),
- attr = struct(
- index_url_overrides = {},
- index_url = "https://example.com/main/simple/",
- extra_index_urls = [],
- sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
- envsubst = [],
- facts = None,
- ),
- cache = {},
- parallel_download = False,
- get_auth = lambda ctx, urls, ctx_attr: struct(),
- )
-
- env.expect.that_dict(downloads).contains_exactly({
- "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
- "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
- "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
- })
-
-_tests.append(_test_download_url)
-
-def _test_download_url_parallel(env):
- downloads = {}
-
- def download(url, output, **kwargs):
- _ = kwargs # buildifier: disable=unused-variable
- downloads[url[0]] = output
- return struct(wait = lambda: struct(success = True))
-
- simpleapi_download(
- ctx = struct(
- os = struct(environ = {}),
- download = download,
- report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
- path = lambda i: "path/for/" + i,
- ),
- attr = struct(
- index_url_overrides = {},
- index_url = "https://example.com/main/simple/",
- extra_index_urls = [],
- sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
- envsubst = [],
- facts = None,
- ),
- cache = {},
- parallel_download = True,
- get_auth = lambda ctx, urls, ctx_attr: struct(),
- )
-
- env.expect.that_dict(downloads).contains_exactly({
- "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
- "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
- "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
- })
-
-_tests.append(_test_download_url_parallel)
-
-def _test_download_envsubst_url(env):
- downloads = {}
-
- def download(url, output, **kwargs):
- _ = kwargs # buildifier: disable=unused-variable
- downloads[url[0]] = output
- return struct(success = True)
-
- simpleapi_download(
- ctx = struct(
- os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
- download = download,
- report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
- path = lambda i: "path/for/" + i,
- ),
- attr = struct(
- index_url_overrides = {},
- index_url = "$INDEX_URL",
- extra_index_urls = [],
- sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
- envsubst = ["INDEX_URL"],
- facts = None,
- ),
- cache = {},
- parallel_download = False,
- get_auth = lambda ctx, urls, ctx_attr: struct(),
- )
-
- env.expect.that_dict(downloads).contains_exactly({
- "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
- "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
- "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
- })
-
-_tests.append(_test_download_envsubst_url)
-
-def _test_strip_empty_path_segments(env):
- env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
- env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
- env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
- env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
- env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
- env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
-
-_tests.append(_test_strip_empty_path_segments)
-
+# def _test_simple(env):
+# calls = []
+#
+# def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
+# _ = ctx # buildifier: disable=unused-variable
+# _ = distribution
+# _ = requested_versions
+# _ = attr
+# _ = cache
+# _ = get_auth
+# env.expect.that_bool(block).equals(False)
+# url = "{}/{}/".format(index_url, distribution)
+# calls.append(url)
+# if "foo" in url and "main" in url:
+# return struct(
+# output = "",
+# success = False,
+# )
+# else:
+# return struct(
+# output = "data from {}".format(url),
+# success = True,
+# )
+#
+# contents = simpleapi_download(
+# ctx = struct(
+# os = struct(environ = {}),
+# report_progress = lambda _: None,
+# ),
+# attr = struct(
+# index_url_overrides = {},
+# index_url = "main",
+# extra_index_urls = ["extra"],
+# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+# envsubst = [],
+# facts = None,
+# ),
+# cache = {},
+# parallel_download = True,
+# read_simpleapi = read_simpleapi,
+# )
+#
+# env.expect.that_collection(calls).contains_exactly([
+# "extra/foo/",
+# "main/bar/",
+# "main/baz/",
+# "main/foo/",
+# ])
+# env.expect.that_dict(contents).contains_exactly({
+# "bar": "data from main/bar/",
+# "baz": "data from main/baz/",
+# "foo": "data from extra/foo/",
+# })
+#
+# _tests.append(_test_simple)
+#
+# def _test_fail(env):
+# calls = []
+# fails = []
+#
+# def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
+# _ = ctx # buildifier: disable=unused-variable
+# _ = distribution
+# _ = requested_versions
+# _ = attr
+# _ = cache
+# _ = get_auth
+# url = "{}/{}/".format(index_url, distribution)
+# env.expect.that_bool(block).equals(False)
+# calls.append(url)
+# if "foo" in url:
+# return struct(
+# output = "",
+# success = False,
+# )
+# if "bar" in url:
+# return struct(
+# output = "",
+# success = False,
+# )
+# else:
+# return struct(
+# output = "data from {}".format(url),
+# success = True,
+# )
+#
+# simpleapi_download(
+# ctx = struct(
+# os = struct(environ = {}),
+# report_progress = lambda _: None,
+# ),
+# attr = struct(
+# index_url_overrides = {
+# "foo": "invalid",
+# },
+# index_url = "main",
+# extra_index_urls = ["extra"],
+# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+# envsubst = [],
+# facts = None,
+# ),
+# cache = {},
+# parallel_download = True,
+# read_simpleapi = read_simpleapi,
+# _fail = fails.append,
+# )
+#
+# env.expect.that_collection(fails).contains_exactly([
+# """
+# Failed to download metadata of the following packages from urls:
+# {
+# "foo": "invalid",
+# "bar": ["main", "extra"],
+# }
+#
+# If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
+# "foo",
+# "bar",
+# ]' to your 'pip.parse' call.
+# """,
+# ])
+# env.expect.that_collection(calls).contains_exactly([
+# "invalid/foo/",
+# "main/bar/",
+# "main/baz/",
+# "invalid/foo/",
+# "extra/bar/",
+# ])
+#
+# _tests.append(_test_fail)
+#
+# def _test_download_url(env):
+# downloads = {}
+#
+# def download(url, output, **kwargs):
+# _ = kwargs # buildifier: disable=unused-variable
+# downloads[url[0]] = output
+# return struct(success = True)
+#
+# simpleapi_download(
+# ctx = struct(
+# os = struct(environ = {}),
+# download = download,
+# report_progress = lambda _: None,
+# read = lambda i: "contents of " + i,
+# path = lambda i: "path/for/" + i,
+# ),
+# attr = struct(
+# index_url_overrides = {},
+# index_url = "https://example.com/main/simple/",
+# extra_index_urls = [],
+# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+# envsubst = [],
+# facts = None,
+# ),
+# cache = {},
+# parallel_download = False,
+# get_auth = lambda ctx, urls, ctx_attr: struct(),
+# )
+#
+# env.expect.that_dict(downloads).contains_exactly({
+# "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+# "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+# "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+# })
+#
+# _tests.append(_test_download_url)
+#
+# def _test_download_url_parallel(env):
+# downloads = {}
+#
+# def download(url, output, **kwargs):
+# _ = kwargs # buildifier: disable=unused-variable
+# downloads[url[0]] = output
+# return struct(wait = lambda: struct(success = True))
+#
+# simpleapi_download(
+# ctx = struct(
+# os = struct(environ = {}),
+# download = download,
+# report_progress = lambda _: None,
+# read = lambda i: "contents of " + i,
+# path = lambda i: "path/for/" + i,
+# ),
+# attr = struct(
+# index_url_overrides = {},
+# index_url = "https://example.com/main/simple/",
+# extra_index_urls = [],
+# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+# envsubst = [],
+# facts = None,
+# ),
+# cache = {},
+# parallel_download = True,
+# get_auth = lambda ctx, urls, ctx_attr: struct(),
+# )
+#
+# env.expect.that_dict(downloads).contains_exactly({
+# "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+# "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+# "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+# })
+#
+# _tests.append(_test_download_url_parallel)
+#
+# def _test_download_envsubst_url(env):
+# downloads = {}
+#
+# def download(url, output, **kwargs):
+# _ = kwargs # buildifier: disable=unused-variable
+# downloads[url[0]] = output
+# return struct(success = True)
+#
+# simpleapi_download(
+# ctx = struct(
+# os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
+# download = download,
+# report_progress = lambda _: None,
+# read = lambda i: "contents of " + i,
+# path = lambda i: "path/for/" + i,
+# ),
+# attr = struct(
+# index_url_overrides = {},
+# index_url = "$INDEX_URL",
+# extra_index_urls = [],
+# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+# envsubst = ["INDEX_URL"],
+# facts = None,
+# ),
+# cache = {},
+# parallel_download = False,
+# get_auth = lambda ctx, urls, ctx_attr: struct(),
+# )
+#
+# env.expect.that_dict(downloads).contains_exactly({
+# "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
+# "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
+# "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
+# })
+#
+# _tests.append(_test_download_envsubst_url)
+#
+# def _test_strip_empty_path_segments(env):
+# env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
+# env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
+# env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
+# env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
+# env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
+# env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
+#
+# _tests.append(_test_strip_empty_path_segments)
+#
def _expect_cache_result(env, cache, key, sdists, whls):
got = env.expect.that_struct(
cache.get(*key),
@@ -288,149 +286,56 @@ def _expect_cache_result(env, cache, key, sdists, whls):
got.whls().contains_exactly(whls)
got.sdists().contains_exactly(sdists)
-def _test_cache_without_facts(env):
- cache = simpleapi_cache()
-
- env.expect.that_str(cache.get("index_url", "distro", ["1.0"])).equals(None)
- cache.setdefault("index_url", "distro", ["1.0"], struct(
+def _test_memory_cache(env):
+ memory = {}
+ cache = memory_cache(memory)
+ all_packages = struct(
sdists = {
- "a": struct(version = "1.0"),
+ "aa": struct(version = "1.0"),
+ "ab": struct(version = "1.1"),
},
whls = {
- "b": struct(version = "1.0"),
- "c": struct(version = "1.1"),
- "d": struct(version = "1.1"),
- "e": struct(version = "1.0"),
+ "ba": struct(version = "1.0"),
+ "bb": struct(version = "1.1"),
},
- ))
- _expect_cache_result(
- env,
- cache,
- key = ("index_url", "distro", ["1.0"]),
- sdists = {"a": struct(version = "1.0")},
- whls = {"b": struct(version = "1.0"), "e": struct(version = "1.0")},
- )
- env.expect.that_str(cache.get("index_url", "distro", ["1.2"])).equals(None)
-
- _expect_cache_result(
- env,
- cache,
- key = ("index_url", "distro", ["1.1"]),
- sdists = {},
- whls = {"c": struct(version = "1.1"), "d": struct(version = "1.1")},
- )
-
-_tests.append(_test_cache_without_facts)
-
-def _test_cache_with_facts(env):
- facts = {}
- cache_storage = {}
- known_facts = {}
- cache = simpleapi_cache(
- cache = cache_storage,
- known_facts = known_facts,
- facts = facts,
- )
-
- sdists = struct(
- a = struct(version = "1.0", url = "https://index_url/distro/a-1.0.tgz", filename = "a-1.0.tgz", yanked = False, sha256 = "a"),
)
-
- # TODO @aignas 2026-02-08: url is relative
- # TODO @aignas 2026-02-08: url starts with `/`
- # TODO @aignas 2026-02-08: url is absolute
- whls = struct(
- b = struct(version = "1.0", url = "https://index_url/distro/b-1.0-tail.whl", filename = "b-1.0-tail.whl", yanked = False, sha256 = "b"),
- c = struct(version = "1.1", url = "https://index_url/distro/c-1.1-tail.whl", filename = "c-1.1-tail.whl", yanked = False, sha256 = "c"),
- d = struct(version = "1.1", url = "https://index_url/distro/d-1.1-tail.whl", filename = "d-1.1-tail.whl", yanked = False, sha256 = "d"),
- e = struct(version = "1.0", url = "https://index_url/distro/e-1.0-tail.whl", filename = "e-1.0-tail.whl", yanked = False, sha256 = "e"),
- )
-
- env.expect.that_str(cache.get("index_url", "distro", ["1.0"])).equals(None)
- cache.setdefault("index_url", "distro", ["1.0"], struct(
- sdists = {
- "a": sdists.a,
- },
- whls = {
- "b": whls.b,
- "c": whls.c,
- "d": whls.d,
- "e": whls.e,
- },
- ))
+ cache.setdefault("index", "distro", None, all_packages)
+ env.expect.that_dict(memory).contains_exactly({
+ ("index", "distro", ""): all_packages,
+ })
_expect_cache_result(
env,
cache,
- key = ("index_url", "distro", ["1.0"]),
+ ("index", "distro", ["1.0"]),
sdists = {
- "a": sdists.a,
+ "aa": struct(version = "1.0"),
},
whls = {
- "b": whls.b,
- "e": whls.e,
+ "ba": struct(version = "1.0"),
},
)
- env.expect.that_str(cache.get("index_url", "distro", ["1.2"])).equals(None)
- env.expect.that_dict(facts).contains_exactly({
- "fact_version": "v1",
- "index_url": {
- "dist_hashes": {
- sdists.a.url: "a",
- whls.b.url: "b",
- whls.e.url: "e",
- },
- },
- })
-
+ env.expect.that_dict(memory).keys().contains_exactly([
+ ("index", "distro", ""),
+ ("index", "distro", "1.0"),
+ ])
_expect_cache_result(
env,
cache,
- key = ("index_url", "distro", ["1.1"]),
- sdists = {},
- whls = {
- "c": whls.c,
- "d": whls.d,
- },
- )
- env.expect.that_dict(facts).contains_exactly({
- "fact_version": "v1",
- "index_url": {
- "dist_hashes": {
- sdists.a.url: "a",
- whls.b.url: "b",
- whls.c.url: "c",
- whls.d.url: "d",
- whls.e.url: "e",
- },
+ ("index", "distro", ["1.1"]),
+ sdists = {
+ "ab": struct(version = "1.1"),
},
- })
-
- # simulate write to MODULE.bazel.lock
- known_facts.update(facts)
- facts.clear()
- cache_storage.clear()
-
- _expect_cache_result(
- env,
- cache,
- key = ("index_url", "distro", ["1.1"]),
- sdists = {},
whls = {
- "c": whls.c,
- "d": whls.d,
+ "bb": struct(version = "1.1"),
},
)
- env.expect.that_dict(facts).contains_exactly({
- "fact_version": "v1",
- "index_url": {
- "dist_hashes": {
- whls.c.url: "c",
- whls.d.url: "d",
- },
- },
- })
+ env.expect.that_dict(memory).keys().contains_exactly([
+ ("index", "distro", ""),
+ ("index", "distro", "1.0"),
+ ("index", "distro", "1.1"),
+ ])
-_tests.append(_test_cache_with_facts)
+_tests.append(_test_memory_cache)
def simpleapi_download_test_suite(name):
"""Create the test suite.
From 96ccbc693ac37a2f0f71d299df2f92b3bbf5970c Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 9 Feb 2026 00:05:36 +0900
Subject: [PATCH 12/23] wip
---
python/private/pypi/simpleapi_download.bzl | 16 +-
.../simpleapi_download_tests.bzl | 506 +++++++++---------
2 files changed, 264 insertions(+), 258 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 25ff73469b..633b2b64dc 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -382,15 +382,16 @@ def _cache_get(cache, facts, index_url, distribution, versions):
# Ensure that we write back to the facts, this happens if we request versions that
# we don't have facts for but we have in-memory cache of SimpleAPI query results
- facts.setdefault(index_url, distribution, cached)
+ facts.setdefault(index_url, distribution, versions, cached)
return cached
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
- cache.setdefault((index_url, distribution), value)
- value = _filter_packages(value, versions, index_url, distribution)
+ filtered = cache.setdefault(index_url, distribution, versions, value)
- if facts:
- facts.setdefault(index_url, distribution, value)
+ if facts and versions:
+ facts.setdefault(index_url, distribution, versions, filtered)
+
+ return filtered
def memory_cache(cache = None):
"""SimpleAPI cache for making fewer calls.
@@ -425,7 +426,10 @@ def _vkey(versions):
return ""
if len(versions) == 1:
- return versions[0]
+ if type(versions) == "dict":
+ return versions.keys()[0]
+ else:
+ return versions[0]
return ",".join(sorted(versions))
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index d29a0fb6b0..2a0b1f8811 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -19,262 +19,264 @@ load("@rules_testing//lib:truth.bzl", "subjects")
load(
"//python/private/pypi:simpleapi_download.bzl",
"memory_cache",
+ "simpleapi_download",
+ "strip_empty_path_segments",
) # buildifier: disable=bzl-visibility
_tests = []
-# def _test_simple(env):
-# calls = []
-#
-# def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
-# _ = ctx # buildifier: disable=unused-variable
-# _ = distribution
-# _ = requested_versions
-# _ = attr
-# _ = cache
-# _ = get_auth
-# env.expect.that_bool(block).equals(False)
-# url = "{}/{}/".format(index_url, distribution)
-# calls.append(url)
-# if "foo" in url and "main" in url:
-# return struct(
-# output = "",
-# success = False,
-# )
-# else:
-# return struct(
-# output = "data from {}".format(url),
-# success = True,
-# )
-#
-# contents = simpleapi_download(
-# ctx = struct(
-# os = struct(environ = {}),
-# report_progress = lambda _: None,
-# ),
-# attr = struct(
-# index_url_overrides = {},
-# index_url = "main",
-# extra_index_urls = ["extra"],
-# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
-# envsubst = [],
-# facts = None,
-# ),
-# cache = {},
-# parallel_download = True,
-# read_simpleapi = read_simpleapi,
-# )
-#
-# env.expect.that_collection(calls).contains_exactly([
-# "extra/foo/",
-# "main/bar/",
-# "main/baz/",
-# "main/foo/",
-# ])
-# env.expect.that_dict(contents).contains_exactly({
-# "bar": "data from main/bar/",
-# "baz": "data from main/baz/",
-# "foo": "data from extra/foo/",
-# })
-#
-# _tests.append(_test_simple)
-#
-# def _test_fail(env):
-# calls = []
-# fails = []
-#
-# def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
-# _ = ctx # buildifier: disable=unused-variable
-# _ = distribution
-# _ = requested_versions
-# _ = attr
-# _ = cache
-# _ = get_auth
-# url = "{}/{}/".format(index_url, distribution)
-# env.expect.that_bool(block).equals(False)
-# calls.append(url)
-# if "foo" in url:
-# return struct(
-# output = "",
-# success = False,
-# )
-# if "bar" in url:
-# return struct(
-# output = "",
-# success = False,
-# )
-# else:
-# return struct(
-# output = "data from {}".format(url),
-# success = True,
-# )
-#
-# simpleapi_download(
-# ctx = struct(
-# os = struct(environ = {}),
-# report_progress = lambda _: None,
-# ),
-# attr = struct(
-# index_url_overrides = {
-# "foo": "invalid",
-# },
-# index_url = "main",
-# extra_index_urls = ["extra"],
-# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
-# envsubst = [],
-# facts = None,
-# ),
-# cache = {},
-# parallel_download = True,
-# read_simpleapi = read_simpleapi,
-# _fail = fails.append,
-# )
-#
-# env.expect.that_collection(fails).contains_exactly([
-# """
-# Failed to download metadata of the following packages from urls:
-# {
-# "foo": "invalid",
-# "bar": ["main", "extra"],
-# }
-#
-# If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
-# "foo",
-# "bar",
-# ]' to your 'pip.parse' call.
-# """,
-# ])
-# env.expect.that_collection(calls).contains_exactly([
-# "invalid/foo/",
-# "main/bar/",
-# "main/baz/",
-# "invalid/foo/",
-# "extra/bar/",
-# ])
-#
-# _tests.append(_test_fail)
-#
-# def _test_download_url(env):
-# downloads = {}
-#
-# def download(url, output, **kwargs):
-# _ = kwargs # buildifier: disable=unused-variable
-# downloads[url[0]] = output
-# return struct(success = True)
-#
-# simpleapi_download(
-# ctx = struct(
-# os = struct(environ = {}),
-# download = download,
-# report_progress = lambda _: None,
-# read = lambda i: "contents of " + i,
-# path = lambda i: "path/for/" + i,
-# ),
-# attr = struct(
-# index_url_overrides = {},
-# index_url = "https://example.com/main/simple/",
-# extra_index_urls = [],
-# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
-# envsubst = [],
-# facts = None,
-# ),
-# cache = {},
-# parallel_download = False,
-# get_auth = lambda ctx, urls, ctx_attr: struct(),
-# )
-#
-# env.expect.that_dict(downloads).contains_exactly({
-# "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
-# "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
-# "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
-# })
-#
-# _tests.append(_test_download_url)
-#
-# def _test_download_url_parallel(env):
-# downloads = {}
-#
-# def download(url, output, **kwargs):
-# _ = kwargs # buildifier: disable=unused-variable
-# downloads[url[0]] = output
-# return struct(wait = lambda: struct(success = True))
-#
-# simpleapi_download(
-# ctx = struct(
-# os = struct(environ = {}),
-# download = download,
-# report_progress = lambda _: None,
-# read = lambda i: "contents of " + i,
-# path = lambda i: "path/for/" + i,
-# ),
-# attr = struct(
-# index_url_overrides = {},
-# index_url = "https://example.com/main/simple/",
-# extra_index_urls = [],
-# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
-# envsubst = [],
-# facts = None,
-# ),
-# cache = {},
-# parallel_download = True,
-# get_auth = lambda ctx, urls, ctx_attr: struct(),
-# )
-#
-# env.expect.that_dict(downloads).contains_exactly({
-# "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
-# "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
-# "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
-# })
-#
-# _tests.append(_test_download_url_parallel)
-#
-# def _test_download_envsubst_url(env):
-# downloads = {}
-#
-# def download(url, output, **kwargs):
-# _ = kwargs # buildifier: disable=unused-variable
-# downloads[url[0]] = output
-# return struct(success = True)
-#
-# simpleapi_download(
-# ctx = struct(
-# os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
-# download = download,
-# report_progress = lambda _: None,
-# read = lambda i: "contents of " + i,
-# path = lambda i: "path/for/" + i,
-# ),
-# attr = struct(
-# index_url_overrides = {},
-# index_url = "$INDEX_URL",
-# extra_index_urls = [],
-# sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
-# envsubst = ["INDEX_URL"],
-# facts = None,
-# ),
-# cache = {},
-# parallel_download = False,
-# get_auth = lambda ctx, urls, ctx_attr: struct(),
-# )
-#
-# env.expect.that_dict(downloads).contains_exactly({
-# "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
-# "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
-# "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
-# })
-#
-# _tests.append(_test_download_envsubst_url)
-#
-# def _test_strip_empty_path_segments(env):
-# env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
-# env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
-# env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
-# env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
-# env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
-# env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
-#
-# _tests.append(_test_strip_empty_path_segments)
-#
+def _test_simple(env):
+ calls = []
+
+ def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
+ _ = ctx # buildifier: disable=unused-variable
+ _ = distribution
+ _ = requested_versions
+ _ = attr
+ _ = cache
+ _ = get_auth
+ env.expect.that_bool(block).equals(False)
+ url = "{}/{}/".format(index_url, distribution)
+ calls.append(url)
+ if "foo" in url and "main" in url:
+ return struct(
+ output = "",
+ success = False,
+ )
+ else:
+ return struct(
+ output = "data from {}".format(url),
+ success = True,
+ )
+
+ contents = simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ report_progress = lambda _: None,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "main",
+ extra_index_urls = ["extra"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+ envsubst = [],
+ facts = None,
+ ),
+ cache = {},
+ parallel_download = True,
+ read_simpleapi = read_simpleapi,
+ )
+
+ env.expect.that_collection(calls).contains_exactly([
+ "extra/foo/",
+ "main/bar/",
+ "main/baz/",
+ "main/foo/",
+ ])
+ env.expect.that_dict(contents).contains_exactly({
+ "bar": "data from main/bar/",
+ "baz": "data from main/baz/",
+ "foo": "data from extra/foo/",
+ })
+
+_tests.append(_test_simple)
+
+def _test_fail(env):
+ calls = []
+ fails = []
+
+ def read_simpleapi(ctx, index_url, distribution, attr, cache, get_auth, requested_versions, block):
+ _ = ctx # buildifier: disable=unused-variable
+ _ = distribution
+ _ = requested_versions
+ _ = attr
+ _ = cache
+ _ = get_auth
+ url = "{}/{}/".format(index_url, distribution)
+ env.expect.that_bool(block).equals(False)
+ calls.append(url)
+ if "foo" in url:
+ return struct(
+ output = "",
+ success = False,
+ )
+ if "bar" in url:
+ return struct(
+ output = "",
+ success = False,
+ )
+ else:
+ return struct(
+ output = "data from {}".format(url),
+ success = True,
+ )
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ report_progress = lambda _: None,
+ ),
+ attr = struct(
+ index_url_overrides = {
+ "foo": "invalid",
+ },
+ index_url = "main",
+ extra_index_urls = ["extra"],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+ envsubst = [],
+ facts = None,
+ ),
+ cache = {},
+ parallel_download = True,
+ read_simpleapi = read_simpleapi,
+ _fail = fails.append,
+ )
+
+ env.expect.that_collection(fails).contains_exactly([
+ """
+Failed to download metadata of the following packages from urls:
+{
+ "bar": ["main", "extra"],
+ "foo": "invalid",
+}
+
+If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
+ "bar",
+ "foo",
+]' to your 'pip.parse' call.
+""",
+ ])
+ env.expect.that_collection(calls).contains_exactly([
+ "invalid/foo/",
+ "main/bar/",
+ "main/baz/",
+ "invalid/foo/",
+ "extra/bar/",
+ ])
+
+_tests.append(_test_fail)
+
+def _test_download_url(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(success = True)
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ download = download,
+ report_progress = lambda _: None,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "https://example.com/main/simple/",
+ extra_index_urls = [],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+ envsubst = [],
+ facts = None,
+ ),
+ cache = {},
+ parallel_download = False,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+ })
+
+_tests.append(_test_download_url)
+
+def _test_download_url_parallel(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(wait = lambda: struct(success = True))
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ download = download,
+ report_progress = lambda _: None,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "https://example.com/main/simple/",
+ extra_index_urls = [],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+ envsubst = [],
+ facts = None,
+ ),
+ cache = {},
+ parallel_download = True,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+ })
+
+_tests.append(_test_download_url_parallel)
+
+def _test_download_envsubst_url(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(success = True)
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
+ download = download,
+ report_progress = lambda _: None,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "$INDEX_URL",
+ extra_index_urls = [],
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
+ envsubst = ["INDEX_URL"],
+ facts = None,
+ ),
+ cache = {},
+ parallel_download = False,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
+ })
+
+_tests.append(_test_download_envsubst_url)
+
+def _test_strip_empty_path_segments(env):
+ env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
+ env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
+ env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
+
+_tests.append(_test_strip_empty_path_segments)
+
def _expect_cache_result(env, cache, key, sdists, whls):
got = env.expect.that_struct(
cache.get(*key),
From 4c9f7298cd2d30d313cd5aaf6742c221f03bcbfe Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 9 Feb 2026 00:26:09 +0900
Subject: [PATCH 13/23] wip
---
python/private/pypi/simpleapi_download.bzl | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 633b2b64dc..b1481f8981 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -333,7 +333,11 @@ def _read_index_result(*, result, index_url, distribution, real_url, cache, requ
return struct(success = False)
# TODO @aignas 2026-02-08: make this the only behaviour, maybe can get rid of `real_url
- output = parse_simpleapi_html(url = real_url, content = result.output, return_absolute = False)
+ output = parse_simpleapi_html(
+ url = real_url,
+ content = result.output,
+ return_absolute = False,
+ )
if not output:
return struct(success = False)
@@ -372,9 +376,10 @@ def _cache_get(cache, facts, index_url, distribution, versions):
if not facts:
return cache.get(index_url, distribution, versions)
- cached = facts.get(index_url, distribution, versions)
- if cached:
- return cached
+ if versions:
+ cached = facts.get(index_url, distribution, versions)
+ if cached:
+ return cached
cached = cache.get(index_url, distribution, versions)
if not cached:
@@ -382,14 +387,15 @@ def _cache_get(cache, facts, index_url, distribution, versions):
# Ensure that we write back to the facts, this happens if we request versions that
# we don't have facts for but we have in-memory cache of SimpleAPI query results
- facts.setdefault(index_url, distribution, versions, cached)
+ if versions:
+ facts.setdefault(index_url, distribution, cached)
return cached
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
filtered = cache.setdefault(index_url, distribution, versions, value)
if facts and versions:
- facts.setdefault(index_url, distribution, versions, filtered)
+ facts.setdefault(index_url, distribution, filtered)
return filtered
From bccef60eccbfbe4d0ff433ed84fc41c51782fc85 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:47:10 +0900
Subject: [PATCH 14/23] wip
---
python/private/pypi/simpleapi_download.bzl | 23 +++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index b1481f8981..7af5ce21c2 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -376,19 +376,18 @@ def _cache_get(cache, facts, index_url, distribution, versions):
if not facts:
return cache.get(index_url, distribution, versions)
- if versions:
+ cached = cache.get(index_url, distribution, versions)
+ if not cached and versions:
+ # Could not get from in-memory, read from lockfile facts
cached = facts.get(index_url, distribution, versions)
- if cached:
- return cached
- cached = cache.get(index_url, distribution, versions)
- if not cached:
- return None
+ # and write to in-memory if we need to access the same info in the future
+ cache.setdefault(index_url, distribution, versions, cached)
- # Ensure that we write back to the facts, this happens if we request versions that
- # we don't have facts for but we have in-memory cache of SimpleAPI query results
- if versions:
+ if versions and cached:
+ # Write to facts so that all *used* data is in the lock file
facts.setdefault(index_url, distribution, cached)
+
return cached
def _cache_setdefault(cache, facts, index_url, distribution, versions, value):
@@ -568,9 +567,11 @@ def _get_from_facts(facts, known_facts, index_url, distribution, requested_versi
def _with_absolute_url(d, index_url, distribution):
index_url_for_distro = "{}/{}/".format(index_url.rstrip("/"), distribution)
+ url = absolute_url(index_url = index_url_for_distro, url = d.url)
+ if d.url == url:
+ return d
# TODO @aignas 2026-02-08: think of a better way to do this
- # TODO @aignas 2026-02-08: if the url is absolute, return d
kwargs = dict()
for attr in [
"sha256",
@@ -584,7 +585,7 @@ def _with_absolute_url(d, index_url, distribution):
if hasattr(d, attr):
kwargs[attr] = getattr(d, attr)
if attr == "url":
- kwargs[attr] = absolute_url(index_url = index_url_for_distro, url = kwargs[attr])
+ kwargs[attr] = url
return struct(**kwargs)
From 27ea12d67e15ecc0ac2508e112593404354eaba6 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:51:51 +0900
Subject: [PATCH 15/23] wip
---
python/private/pypi/parse_simpleapi_html.bzl | 15 ++----------
python/private/pypi/simpleapi_download.bzl | 25 ++++++++------------
2 files changed, 12 insertions(+), 28 deletions(-)
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index da11a77635..a450980950 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -16,14 +16,12 @@
Parse SimpleAPI HTML in Starlark.
"""
-def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
+def parse_simpleapi_html(*, content, distribution):
"""Get the package URLs for given shas by parsing the Simple API HTML.
Args:
- url(str): The URL that the HTML content can be downloaded from.
- distribution(str): TODO
+ distribution(str): Distribution name for which we are parsing the HTML.
content(str): The Simple API HTML content.
- return_absolute: {type}`bool` TODO
Returns:
A list of structs with:
@@ -35,9 +33,6 @@ def parse_simpleapi_html(*, url, content, distribution = None, return_absolute =
present, then the 'metadata_url' is also present. Defaults to "".
* metadata_url: The URL for the METADATA if we can download it. Defaults to "".
"""
- if not distribution:
- _, _, distribution = url.strip("/").rpartition("/")
-
sdists = {}
whls = {}
lines = content.split("
Date: Wed, 11 Feb 2026 11:54:31 +0900
Subject: [PATCH 16/23] wip
---
python/private/pypi/simpleapi_download.bzl | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 4fd2ad1894..a263d508a1 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -184,6 +184,12 @@ If you would like to skip downloading metadata for these packages please add 'si
return contents
def _download_simpleapi(*, ctx, url, attr_envsubst, get_auth, **kwargs):
+ # NOTE @aignas 2026-02-11: If we return the `real_url` outside this function
+ # we may leak credentials to the lock file if the PyPI extension that this code
+ # is used in is not marked as `reproducible = True` or if we write facts.
+ #
+ # For this reason consider never doing `envsubst` outside this function or return
+ # `real_url`. If needed, do `envsubst` where you use the url to download stuff.
real_url = strip_empty_path_segments(envsubst(
url,
attr_envsubst,
@@ -302,7 +308,6 @@ def _read_simpleapi(ctx, index_url, distribution, attr, cache, requested_version
# NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
# the whl location and we cannot handle multiple URLs at once by passing
# them to ctx.download if we want to correctly handle the relative URLs.
- # TODO: Add a test that env subbed index urls do not leak into the lock file.
cached = cache.get(index_url, distribution, requested_versions)
if cached:
From 94bea8b4e913983bec08e9b58c8c11c03444cefc Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:57:06 +0900
Subject: [PATCH 17/23] add a note
---
python/private/pypi/simpleapi_download.bzl | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index a263d508a1..b728f4aff6 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -256,7 +256,11 @@ def _read(ctx, result, output):
if not result.success:
return result
- return struct(success = True, output = ctx.read(output))
+ contents = ctx.read(output)
+
+ # NOTE @aignas 2026-02-11: we are leaving the files there for debugging purposes. Usually
+ # after the module extension is finished evaluating, the files will be deleted anyway.
+ return struct(success = True, output = contents)
def strip_empty_path_segments(url):
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.
From 39ee2fe02bd4f382fe854f5123d9b2606bf86212 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 12:05:30 +0900
Subject: [PATCH 18/23] fixup tests
---
python/private/pypi/simpleapi_download.bzl | 4 ++
.../parse_simpleapi_html_tests.bzl | 44 +++++++++----------
2 files changed, 26 insertions(+), 22 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index b728f4aff6..34af56d7ea 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -570,6 +570,10 @@ def _get_from_facts(facts, known_facts, index_url, distribution, requested_versi
return output
def _with_absolute_url(d, index_url, distribution):
+ if not hasattr(d, "url"):
+ # NOTE @aignas 2026-02-11: This is normally used in tests
+ return d
+
index_url_for_distro = "{}/{}/".format(index_url.rstrip("/"), distribution)
url = absolute_url(index_url = index_url_for_distro, url = d.url)
if d.url == url:
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index b96d02f990..06c490f9f8 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -52,7 +52,7 @@ def _test_sdist(env):
'data-requires-python=">=3.7"',
],
filename = "foo-0.0.1.tar.gz",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.1.tar.gz",
@@ -69,7 +69,7 @@ def _test_sdist(env):
'data-requires-python=">=3.7"',
],
filename = "foo-0.0.1.tar.gz",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.1.tar.gz",
@@ -83,7 +83,7 @@ def _test_sdist(env):
for (input, want) in tests:
html = _generate_html(input)
- got = parse_simpleapi_html(url = input.url, content = html)
+ got = parse_simpleapi_html(distribution = input.distribution, content = html)
env.expect.that_collection(got.sdists).has_size(1)
env.expect.that_collection(got.whls).has_size(0)
env.expect.that_collection(got.sha256s_by_version).has_size(1)
@@ -120,7 +120,7 @@ def _test_whls(env):
'data-core-metadata="sha256=deadb00f"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
@@ -141,7 +141,7 @@ def _test_whls(env):
'data-core-metadata="sha256=deadb00f"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
@@ -161,7 +161,7 @@ def _test_whls(env):
'data-core-metadata="sha256=deadb00f"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
@@ -181,7 +181,7 @@ def _test_whls(env):
'data-dist-info-metadata="sha256=deadb00f"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
@@ -200,7 +200,7 @@ def _test_whls(env):
'data-requires-python=">=3.7"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "foo",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
@@ -220,15 +220,15 @@ def _test_whls(env):
'data-dist-info-metadata="sha256=deadb00f"',
],
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
- url = "https://example.org/python-wheels/bar/foo/",
+ distribution = "foo",
),
struct(
filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
metadata_sha256 = "deadb00f",
- metadata_url = "https://example.org/python-wheels/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata",
+ metadata_url = "../../foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata",
sha256 = "deadbeef",
version = "0.0.2",
- url = "https://example.org/python-wheels/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ url = "../../foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
yanked = False,
),
),
@@ -238,14 +238,14 @@ def _test_whls(env):
'href="/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl#sha256=deadbeef"',
],
filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
- url = "https://download.pytorch.org/whl/cpu/torch",
+ distribution = "torch",
),
struct(
filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
metadata_sha256 = "",
metadata_url = "",
sha256 = "deadbeef",
- url = "https://download.pytorch.org/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ url = "/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
version = "2.0.0",
yanked = False,
),
@@ -256,14 +256,14 @@ def _test_whls(env):
'href="/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl#sha256=notdeadbeef"',
],
filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
- url = "http://download.pytorch.org/whl/cpu/torch",
+ distribution = "torch",
),
struct(
filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
metadata_sha256 = "",
metadata_url = "",
sha256 = "notdeadbeef",
- url = "http://download.pytorch.org/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ url = "/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
version = "2.0.0",
yanked = False,
),
@@ -274,7 +274,7 @@ def _test_whls(env):
'href="1.0.0/mypy_extensions-1.0.0-py3-none-any.whl#sha256=deadbeef"',
],
filename = "mypy_extensions-1.0.0-py3-none-any.whl",
- url = "https://example.org/simple/mypy_extensions",
+ distribution = "mypy_extensions",
),
struct(
filename = "mypy_extensions-1.0.0-py3-none-any.whl",
@@ -282,7 +282,7 @@ def _test_whls(env):
metadata_url = "",
version = "1.0.0",
sha256 = "deadbeef",
- url = "https://example.org/simple/mypy_extensions/1.0.0/mypy_extensions-1.0.0-py3-none-any.whl",
+ url = "1.0.0/mypy_extensions-1.0.0-py3-none-any.whl",
yanked = False,
),
),
@@ -292,7 +292,7 @@ def _test_whls(env):
'href="unknown://example.com/mypy_extensions-1.0.0-py3-none-any.whl#sha256=deadbeef"',
],
filename = "mypy_extensions-1.0.0-py3-none-any.whl",
- url = "https://example.org/simple/mypy_extensions",
+ distribution = "mypy_extensions",
),
struct(
filename = "mypy_extensions-1.0.0-py3-none-any.whl",
@@ -300,7 +300,7 @@ def _test_whls(env):
metadata_url = "",
sha256 = "deadbeef",
version = "1.0.0",
- url = "https://example.org/simple/mypy_extensions/unknown://example.com/mypy_extensions-1.0.0-py3-none-any.whl",
+ url = "unknown://example.com/mypy_extensions-1.0.0-py3-none-any.whl",
yanked = False,
),
),
@@ -310,7 +310,7 @@ def _test_whls(env):
'href="/whl/cpu/torch-2.6.0%2Bcpu-cp39-cp39-manylinux_2_28_aarch64.whl#sha256=deadbeef"',
],
filename = "torch-2.6.0+cpu-cp39-cp39-manylinux_2_28_aarch64.whl",
- url = "https://example.org/",
+ distribution = "torch",
),
struct(
filename = "torch-2.6.0+cpu-cp39-cp39-manylinux_2_28_aarch64.whl",
@@ -319,7 +319,7 @@ def _test_whls(env):
sha256 = "deadbeef",
version = "2.6.0+cpu",
# A URL with % could occur if directly written in requirements.
- url = "https://example.org/whl/cpu/torch-2.6.0%2Bcpu-cp39-cp39-manylinux_2_28_aarch64.whl",
+ url = "/whl/cpu/torch-2.6.0%2Bcpu-cp39-cp39-manylinux_2_28_aarch64.whl",
yanked = False,
),
),
@@ -327,7 +327,7 @@ def _test_whls(env):
for (input, want) in tests:
html = _generate_html(input)
- got = parse_simpleapi_html(url = input.url, content = html)
+ got = parse_simpleapi_html(distribution = input.distribution, content = html)
env.expect.that_collection(got.sdists).has_size(0)
env.expect.that_collection(got.whls).has_size(1)
if not got:
From 3c06bd260691062e85fad0ebf2094f2091ac11b4 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 12:06:06 +0900
Subject: [PATCH 19/23] wip
---
tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index 06c490f9f8..28e4773cd8 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -110,6 +110,8 @@ _tests.append(_test_sdist)
def _test_whls(env):
# buildifier: disable=unsorted-dict-items
+ # TODO @aignas 2026-02-11: remove some tests
+ # TODO @aignas 2026-02-11: add tests for absolute URLs
tests = [
(
struct(
From 81e77a6dc50bdb11115949bc893a0e1a87c3e0f2 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 12:17:26 +0900
Subject: [PATCH 20/23] move the cache construction up
---
python/private/pypi/extension.bzl | 33 ++----------
python/private/pypi/hub_builder.bzl | 7 ---
python/private/pypi/simpleapi_download.bzl | 62 ++++++++++++++++------
3 files changed, 49 insertions(+), 53 deletions(-)
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index bd384722fc..cc3e4a3786 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -27,7 +27,7 @@ load(":parse_whl_name.bzl", "parse_whl_name")
load(":pep508_env.bzl", "env")
load(":pip_repository_attrs.bzl", "ATTRS")
load(":platform.bzl", _plat = "platform")
-load(":simpleapi_download.bzl", "simpleapi_download")
+load(":simpleapi_download.bzl", "simpleapi_download", _simpleapi_cache = "simpleapi_cache")
load(":whl_library.bzl", "whl_library")
def _whl_mods_impl(whl_mods_dict):
@@ -224,8 +224,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
# dict[str repo, HubBuilder]
# See `hub_builder.bzl%hub_builder()` for `HubBuilder`
pip_hub_map = {}
- simpleapi_cache = {}
- facts = {}
+ simpleapi_cache = _simpleapi_cache(module_ctx)
for mod in module_ctx.modules:
for pip_attr in mod.tags.parse:
@@ -243,7 +242,6 @@ You cannot use both the additive_build_content and additive_build_content_file a
evaluate_markers_fn = kwargs.get("evaluate_markers", None),
available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
- facts = facts,
)
pip_hub_map[pip_attr.hub_name] = builder
elif pip_hub_map[hub_name].module_name != mod.name:
@@ -290,25 +288,6 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_group_map[hub.name] = out.group_map
hub_whl_map[hub.name] = out.whl_map
- facts = {
- "fact_version": facts.get("fact_version"),
- } | {
- index_url: {
- k: _sorted_dict(f.get(k))
- for k in [
- "dist_filenames",
- "dist_hashes",
- "dist_yanked",
- ]
- if f.get(k)
- }
- for index_url, f in facts.items()
- if index_url not in ["fact_version"]
- }
- if len(facts) == 1:
- # only version is present, skip writing
- facts = None
-
return struct(
config = config,
exposed_packages = exposed_packages,
@@ -317,7 +296,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_whl_map = hub_whl_map,
whl_libraries = whl_libraries,
whl_mods = whl_mods,
- facts = facts,
+ facts = simpleapi_cache.get_facts(),
platform_config_settings = {
hub_name: {
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
@@ -327,12 +306,6 @@ You cannot use both the additive_build_content and additive_build_content_file a
},
)
-def _sorted_dict(d):
- if not d:
- return {}
-
- return {k: v for k, v in sorted(d.items())}
-
def _pip_impl(module_ctx):
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index 2bc12bf7b6..7510bcb587 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -31,7 +31,6 @@ def hub_builder(
simpleapi_download_fn,
evaluate_markers_fn,
logger,
- facts = None,
simpleapi_cache = {}):
"""Return a hub builder instance
@@ -48,7 +47,6 @@ def hub_builder(
used during the `repository_rule` and must be always compatible with the host.
simpleapi_download_fn: the function used to download from SimpleAPI.
simpleapi_cache: the cache for the download results.
- facts: the facts if they are available.
logger: the logger for this builder.
"""
@@ -91,10 +89,6 @@ def hub_builder(
# Functions to download according to the config
# dict[str python_version, callable]
_get_index_urls = {},
- # Contains the dict to store the facts to be written to the lockfile that
- # can be safely cached for future invocations.
- # dict[str, dict[str, str]]
- _facts = facts,
# Tells whether to use the downloader for a package.
# dict[str python_version, dict[str package_name, bool use_downloader]]
_use_downloader = {},
@@ -414,7 +408,6 @@ def _set_get_index_urls(self, pip_attr):
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
- facts = self._facts,
),
cache = self._simpleapi_cache,
parallel_download = pip_attr.parallel_download,
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 34af56d7ea..1ef00201ff 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -84,15 +84,11 @@ def simpleapi_download(
index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
- if attr.facts:
- ctx.report_progress("Fetch package lists from PyPI index or read from MODULE.bazel.lock")
- else:
- ctx.report_progress("Fetch package lists from PyPI index")
+ ctx.report_progress("Fetch package lists from PyPI index")
- cache = simpleapi_cache(
- memory_cache = memory_cache(cache),
- facts_cache = facts_cache(getattr(ctx, "facts", None), attr.facts),
- )
+ if type(cache) == "dict":
+ # compatibility with some tests
+ cache = memory_cache(cache)
found_on_index = {}
warn_overrides = False
@@ -348,34 +344,68 @@ def _read_index_result(*, result, index_url, distribution, cache, requested_vers
cache.setdefault(index_url, distribution, requested_versions, output)
return struct(success = True, output = output)
-def simpleapi_cache(memory_cache, facts_cache):
+def simpleapi_cache(ctx, *, mcache = None, fcache = None):
"""SimpleAPI cache for making fewer calls.
Args:
- memory_cache: the storage to store things in memory.
- facts_cache: the storage to retrieve known facts.
+ ctx: the context to get the facts.
+ mcache: the storage to store things in memory.
+ fcache: the storage to retrieve known facts.
Returns:
- struct with 2 methods, `get` and `setdefault`.
+ struct with 2 methods, `get` and `setdefault` and a facts dictionary.
"""
+ mcache = mcache or memory_cache({})
+ facts = {}
+ fcache = fcache or facts_cache(getattr(ctx, "facts", None), facts)
+
return struct(
get = lambda index_url, distribution, versions: _cache_get(
- memory_cache,
- facts_cache,
+ mcache,
+ fcache,
index_url,
distribution,
versions,
),
setdefault = lambda index_url, distribution, versions, value: _cache_setdefault(
- memory_cache,
- facts_cache,
+ mcache,
+ fcache,
index_url,
distribution,
versions,
value,
),
+ get_facts = lambda: _get_facts(facts),
)
+def _get_facts(facts):
+ facts = {
+ "fact_version": facts.get("fact_version"),
+ } | {
+ index_url: {
+ k: _sorted_dict(f.get(k))
+ for k in [
+ "dist_filenames",
+ "dist_hashes",
+ "dist_yanked",
+ ]
+ if f.get(k)
+ }
+ for index_url, f in facts.items()
+ if index_url not in ["fact_version"]
+ }
+ if len(facts) == 1:
+ # only version is present, skip writing
+ facts = None
+
+ return facts
+
+def _sorted_dict(d):
+ if not d:
+ return {}
+
+ return {k: v for k, v in sorted(d.items())}
+
def _cache_get(cache, facts, index_url, distribution, versions):
if not facts:
return cache.get(index_url, distribution, versions)
From 8e9d2768b433a939ea1bd4b4dd32c0416079906f Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 12:19:12 +0900
Subject: [PATCH 21/23] wip
---
python/private/pypi/simpleapi_download.bzl | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 1ef00201ff..0a61ff5d3c 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -350,10 +350,11 @@ def simpleapi_cache(ctx, *, mcache = None, fcache = None):
Args:
ctx: the context to get the facts.
mcache: the storage to store things in memory.
- fcache: the storage to retrieve known facts.
+ fcache: the storage to retrieve known facts from the lock file.
Returns:
- struct with 2 methods, `get` and `setdefault` and a facts dictionary.
+ struct with 3 methods, `get`, `setdefault`, `get_facts` for writing
+ back to the lock file.
"""
mcache = mcache or memory_cache({})
facts = {}
From cc7612e458ec998dc1f1cfc52e041219a2dddef7 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Wed, 11 Feb 2026 23:52:32 +0900
Subject: [PATCH 22/23] wip
---
python/private/pypi/simpleapi_download.bzl | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 0a61ff5d3c..f326cd3856 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -341,6 +341,10 @@ def _read_index_result(*, result, index_url, distribution, cache, requested_vers
if not output:
return struct(success = False)
+ # Set the value for no versions
+ cache.setdefault(index_url, distribution, None, output)
+
+ # Set the value for requested versions as well
cache.setdefault(index_url, distribution, requested_versions, output)
return struct(success = True, output = output)
From 38447780c75661bf8de20eca321b52b126fa32f3 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Fri, 13 Feb 2026 08:27:42 +0900
Subject: [PATCH 23/23] wip
---
python/private/pypi/hub_builder.bzl | 6 +-----
python/private/pypi/simpleapi_download.bzl | 6 +-----
2 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index 7510bcb587..7d84e9cb7b 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -395,11 +395,7 @@ def _set_get_index_urls(self, pip_attr):
index_url = pip_attr.experimental_index_url,
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
- sources = [
- d
- for d in distributions
- if _use_downloader(self, python_version, d)
- ] if type(distributions) == "list" else {
+ sources = {
d: versions
for d, versions in distributions.items()
if _use_downloader(self, python_version, d)
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index f326cd3856..0fa42d0697 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -93,11 +93,7 @@ def simpleapi_download(
found_on_index = {}
warn_overrides = False
- # Normalize the inputs
- if type(attr.sources) == "list":
- fail("TODO")
- else:
- input_sources = attr.sources
+ input_sources = attr.sources
for i, index_url in enumerate(index_urls):
if i != 0: