From 39d6beafccd36f4f0acb235b71af72e54f7de9c7 Mon Sep 17 00:00:00 2001 From: Rohan Devasthale Date: Fri, 30 Jan 2026 14:46:54 -0500 Subject: [PATCH] [Spike] Multiple version mode for bootstrap commands This commit attempts to implement the multiple version mode for bootstrap and is a result of investigation as to what it might look like. This is NOT the final implementation for the feature but can be a good starting point to discuss how we can implement this Hence this PR is in Draft state. Signed-off-by: Rohan Devasthale --- e2e/ci_bootstrap_suite.sh | 1 + e2e/test_bootstrap_all_versions.sh | 176 +++++++++++++ src/fromager/bootstrapper.py | 387 +++++++++++++++++++++++++++-- src/fromager/commands/bootstrap.py | 80 +++++- src/fromager/resolver.py | 121 +++++++++ tests/test_bootstrapper.py | 363 +++++++++++++++++++++++++++ tests/test_resolver.py | 169 +++++++++++++ 7 files changed, 1270 insertions(+), 27 deletions(-) create mode 100755 e2e/test_bootstrap_all_versions.sh diff --git a/e2e/ci_bootstrap_suite.sh b/e2e/ci_bootstrap_suite.sh index deab8dd2..3913a995 100755 --- a/e2e/ci_bootstrap_suite.sh +++ b/e2e/ci_bootstrap_suite.sh @@ -25,6 +25,7 @@ test_section "bootstrap configuration tests" run_test "bootstrap_prerelease" run_test "bootstrap_cache" run_test "bootstrap_sdist_only" +run_test "bootstrap_all_versions" test_section "bootstrap git URL tests" run_test "bootstrap_git_url" diff --git a/e2e/test_bootstrap_all_versions.sh b/e2e/test_bootstrap_all_versions.sh new file mode 100755 index 00000000..0770d395 --- /dev/null +++ b/e2e/test_bootstrap_all_versions.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# -*- indent-tabs-mode: nil; tab-width: 2; sh-indentation: 2; -*- + +# Test bootstrap with --all-versions option to verify multiple versions +# of a package are built. This test verifies that: +# 1. Multiple versions of top-level packages are built +# 2. Cache filtering works correctly (skips already-built versions) +# 3. Warning is shown when using --all-versions without --skip-constraints +# +# We use 'six' because it has NO dependencies, making the test fast while +# still testing the core all-versions functionality. +# +# Issue #878: Add multiple version mode to the bootstrap commands + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$SCRIPTDIR/common.sh" + +pass=true + +# We use 'six' as the test package because: +# - It has multiple versions on PyPI +# - It has NO dependencies (pure Python, no setuptools dependency at runtime) +# - This makes the test fast while still testing multi-version behavior +# +# six>=1.16.0 should match all versions from 1.16.0 onwards +# We use a narrow range to limit how many versions we build. +PACKAGE_SPEC="six>=1.16.0" + +################################################################################ +# Test 1: Bootstrap with --all-versions flag +# This should build all matching versions of six within the specified range. +# Since six has no runtime dependencies, this tests the core multi-version +# resolution without the complexity of deep dependency chains. +################################################################################ + +echo "=== Test 1: Bootstrap with --all-versions ===" + +fromager \ + --debug \ + --log-file="$OUTDIR/bootstrap.log" \ + --error-log-file="$OUTDIR/fromager-errors.log" \ + --sdists-repo="$OUTDIR/sdists-repo" \ + --wheels-repo="$OUTDIR/wheels-repo" \ + --work-dir="$OUTDIR/work-dir" \ + bootstrap --all-versions --skip-constraints "$PACKAGE_SPEC" + +# Verify that we have multiple versions of six (top-level package) +SIX_WHEEL_COUNT=$(find "$OUTDIR/wheels-repo/downloads" -name 'six-*.whl' | wc -l) +echo "Found $SIX_WHEEL_COUNT six wheels (top-level package)" + +if [ "$SIX_WHEEL_COUNT" -lt 1 ]; then + echo "FAIL: Expected at least 1 six wheel file, found $SIX_WHEEL_COUNT" 1>&2 + pass=false +else + echo "PASS: six versions built" +fi + +# Check for expected log messages indicating all-versions mode is active +EXPECTED_LOG_MESSAGES=( + "all-versions mode enabled: building all matching versions" + "found .* matching versions for six" + "including six==.* for build" +) + +for pattern in "${EXPECTED_LOG_MESSAGES[@]}"; do + if ! grep -Eq "$pattern" "$OUTDIR/bootstrap.log"; then + echo "FAIL: Did not find log message pattern '$pattern' in $OUTDIR/bootstrap.log" 1>&2 + pass=false + else + echo "PASS: Found log pattern: $pattern" + fi +done + +# Verify constraints.txt was NOT created (because we used --skip-constraints) +if [ -f "$OUTDIR/work-dir/constraints.txt" ]; then + echo "FAIL: constraints.txt was created despite --skip-constraints flag" 1>&2 + pass=false +fi + +# Verify build-order.json was created +if [ ! -f "$OUTDIR/work-dir/build-order.json" ]; then + echo "FAIL: build-order.json was not created" 1>&2 + pass=false +fi + +# Check that build-order.json contains entries for six +SIX_VERSION_COUNT=$(grep -o '"dist": "six"' "$OUTDIR/work-dir/build-order.json" | wc -l) +echo "Found $SIX_VERSION_COUNT six entries in build-order.json" + +if [ "$SIX_VERSION_COUNT" -lt 1 ]; then + echo "FAIL: Expected at least 1 six entry in build-order.json, found $SIX_VERSION_COUNT" 1>&2 + pass=false +fi + +################################################################################ +# Test 2: Bootstrap with --all-versions and cache server +# This verifies that versions already in the cache are skipped. +################################################################################ + +echo "=== Test 2: Bootstrap with --all-versions and cache ===" + +# Start a local wheel server with the wheels we just built +start_local_wheel_server + +# Clean up work directory but keep the wheels repo +rm -rf "$OUTDIR/work-dir" +rm -rf "$OUTDIR/sdists-repo" +rm "$OUTDIR/bootstrap.log" + +# Run bootstrap again with cache server pointing to our local server +# Since all versions are already cached, we should skip building them +fromager \ + --debug \ + --log-file="$OUTDIR/bootstrap.log" \ + --error-log-file="$OUTDIR/fromager-errors.log" \ + --sdists-repo="$OUTDIR/sdists-repo" \ + --wheels-repo="$OUTDIR/wheels-repo-2" \ + --work-dir="$OUTDIR/work-dir" \ + bootstrap --all-versions --skip-constraints \ + --cache-wheel-server-url="$WHEEL_SERVER_URL" "$PACKAGE_SPEC" + +# Check for log messages indicating versions were skipped due to cache +# Format: "skipping %s==%s: already exists in cache server" +if grep -Eq "skipping six==.*: already exists in cache server" "$OUTDIR/bootstrap.log"; then + echo "PASS: Found cache skip messages for six" +else + echo "WARN: Did not find cache skip messages for six (may be expected if cache check failed)" +fi + +# Verify build-order.json was created even with cache hits +if [ ! -f "$OUTDIR/work-dir/build-order.json" ]; then + echo "FAIL: build-order.json was not created in second run" 1>&2 + pass=false +fi + +################################################################################ +# Test 3: Verify warning when --all-versions used without --skip-constraints +################################################################################ + +echo "=== Test 3: Warning when --all-versions without --skip-constraints ===" + +rm -rf "$OUTDIR/work-dir" +rm "$OUTDIR/bootstrap.log" || true + +# This should produce a warning but still work +# Using a pinned version to avoid long builds +fromager \ + --debug \ + --log-file="$OUTDIR/bootstrap.log" \ + --error-log-file="$OUTDIR/fromager-errors.log" \ + --sdists-repo="$OUTDIR/sdists-repo" \ + --wheels-repo="$OUTDIR/wheels-repo-3" \ + --work-dir="$OUTDIR/work-dir" \ + bootstrap --all-versions "six==1.16.0" || true # May fail due to constraints conflict + +# Check for the warning message +if grep -q "all-versions mode works best with --skip-constraints" "$OUTDIR/bootstrap.log"; then + echo "PASS: Found expected warning about --skip-constraints" +else + echo "FAIL: Did not find warning about --skip-constraints" 1>&2 + pass=false +fi + +################################################################################ +# Final result +################################################################################ + +echo "" +echo "=== Test Results ===" +if $pass; then + echo "All tests PASSED" +else + echo "Some tests FAILED" +fi + +$pass diff --git a/src/fromager/bootstrapper.py b/src/fromager/bootstrapper.py index 2516fdca..40af3f28 100644 --- a/src/fromager/bootstrapper.py +++ b/src/fromager/bootstrapper.py @@ -89,6 +89,7 @@ def __init__( cache_wheel_server_url: str | None = None, sdist_only: bool = False, test_mode: bool = False, + all_versions: bool = False, ) -> None: if test_mode and sdist_only: raise ValueError( @@ -101,6 +102,10 @@ def __init__( self.cache_wheel_server_url = cache_wheel_server_url or ctx.wheel_server_url self.sdist_only = sdist_only self.test_mode = test_mode + # When all_versions is True, we build all matching versions of top-level + # packages instead of just the newest version. This is used for populating + # general-purpose package indices with historical versions. + self.all_versions = all_versions self.why: list[tuple[RequirementType, Requirement, Version]] = [] # Push items onto the stack as we start to resolve their # dependencies so at the end we have a list of items that need to @@ -166,6 +171,184 @@ def resolve_and_add_top_level( self._record_test_mode_failure(req, None, err, "resolution") return None + def resolve_and_add_top_level_all_versions( + self, + req: Requirement, + ) -> list[tuple[str, Version]]: + """Resolve all matching versions of a top-level requirement. + + This method is used by the --all-versions mode to build multiple versions + of each top-level package. It resolves all versions that match the + requirement specifier and constraints, filters out versions that already + exist in the cache server, and adds each version to the dependency graph. + + Args: + req: The top-level requirement to resolve. May include version + specifiers like "requests>=2.20,<3.0". + + Returns: + List of (source_url, version) tuples for all versions that should be + built. Versions already in the cache are excluded from this list. + + Note: + This method uses the resolver's resolve_all_versions() function which + returns candidates sorted by version descending (newest first). + Existing cached versions are filtered out to avoid redundant builds. + """ + pbi = self.ctx.package_build_info(req) + results: list[tuple[str, Version]] = [] + + try: + if pbi.pre_built: + # For pre-built packages, we typically only want the newest + # version since we're downloading wheels, not building them. + # Fall back to single version resolution. + single_result = self.resolve_and_add_top_level(req) + if single_result is not None: + results.append(single_result) + return results + + # Use the new resolver function to get all matching sdist versions. + # This applies the same constraints and specifier filtering as + # regular resolution but returns all matches, not just the best. + all_candidates = resolver.resolve_all_versions( + ctx=self.ctx, + req=req, + sdist_server_url=resolver.PYPI_SERVER_URL, + include_sdists=True, + include_wheels=False, # We want to build from source + req_type=RequirementType.TOP_LEVEL, + ) + + if not all_candidates: + logger.warning("no matching versions found for %s", req) + return results + + logger.info( + "found %d matching versions for %s (before cache filtering)", + len(all_candidates), + req, + ) + + # Filter out versions that already exist in the cache server. + # This is a key optimization: we skip building versions that have + # already been built and uploaded to the cache server. + for candidate in all_candidates: + version = candidate.version + source_url = candidate.url + + # Check if this version already exists in the cache server. + # If it does, we can skip bootstrapping it entirely. + if self._version_exists_in_cache(req, version): + logger.info( + "skipping %s==%s: already exists in cache server", + req.name, + version, + ) + continue + + # Add to dependency graph + logger.info("including %s==%s for build", req.name, version) + self.ctx.dependency_graph.add_dependency( + parent_name=None, + parent_version=None, + req_type=RequirementType.TOP_LEVEL, + req=req, + req_version=version, + download_url=source_url, + pre_built=pbi.pre_built, + constraint=self.ctx.constraints.get_constraint(req.name), + ) + results.append((source_url, version)) + + logger.info( + "%d versions of %s will be built (after cache filtering)", + len(results), + req, + ) + + except Exception as err: + if not self.test_mode: + raise + self._record_test_mode_failure(req, None, err, "resolution") + + return results + + def _version_exists_in_cache( + self, + req: Requirement, + version: Version, + ) -> bool: + """Check if a specific version of a package exists in the cache server. + + This is used by all-versions mode to skip building versions that have + already been built and uploaded to the cache server. Checking the cache + before building is more efficient than relying on the bootstrapper to + detect cached wheels during the build process. + + Args: + req: The requirement being checked. + version: The specific version to check for. + + Returns: + True if the version exists in the cache server, False otherwise. + + Note: + This uses the same resolution logic as _download_wheel_from_cache() + but only checks for existence, not downloading the wheel. + """ + if not self.cache_wheel_server_url: + return False + + try: + # Create a pinned requirement for the specific version + pinned_req = Requirement(f"{req.name}=={version}") + pbi = self.ctx.package_build_info(req) + expected_build_tag = pbi.build_tag(version) + + # Try to resolve this exact version from the cache server + wheel_url, resolved_version = resolver.resolve( + ctx=self.ctx, + req=pinned_req, + sdist_server_url=self.cache_wheel_server_url, + include_sdists=False, + include_wheels=True, + ) + + # Verify the build tag matches our expected tag + from urllib.parse import urlparse + + wheelfile_name = pathlib.Path(urlparse(wheel_url).path) + _, _, build_tag, _ = wheels.extract_info_from_wheel_file( + req, wheelfile_name + ) + + if expected_build_tag and expected_build_tag != build_tag: + logger.debug( + "cache has %s==%s but build tag mismatch: expected %s, got %s", + req.name, + version, + expected_build_tag, + build_tag, + ) + return False + + logger.debug( + "found %s==%s in cache server with matching build tag", + req.name, + version, + ) + return True + + except Exception: + # Any failure means the version is not in the cache + logger.debug( + "version %s==%s not found in cache server", + req.name, + version, + ) + return False + def resolve_version( self, req: Requirement, @@ -221,7 +404,12 @@ def _processing_build_requirement(self, current_req_type: RequirementType) -> bo logger.debug("is not a build requirement") return False - def bootstrap(self, req: Requirement, req_type: RequirementType) -> None: + def bootstrap( + self, + req: Requirement, + req_type: RequirementType, + resolved_version: Version | None = None, + ) -> None: """Bootstrap a package and its dependencies. Handles setup, validation, and error handling. Delegates actual build @@ -229,21 +417,50 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> None: In test mode, catches build exceptions, records package name, and continues. In normal mode, raises exceptions immediately (fail-fast). + + Args: + req: The requirement to bootstrap. + req_type: The type of requirement (TOP_LEVEL, INSTALL, BUILD_*, etc.). + resolved_version: Optional pre-resolved version. When provided (typically + in all-versions mode), this version is used instead of resolving. + This is necessary because in all-versions mode, we've already + resolved all versions during the pre-resolution phase. """ logger.info(f"bootstrapping {req} as {req_type} dependency of {self.why[-1:]}") # Resolve version first so we have it for error reporting. - # In test mode, record resolution failures and continue. - try: - source_url, resolved_version = self.resolve_version( - req=req, - req_type=req_type, - ) - except Exception as err: - if not self.test_mode: - raise - self._record_test_mode_failure(req, None, err, "resolution") - return + # If a resolved_version was provided (all-versions mode), use it directly + # instead of re-resolving. This is important because in all-versions mode, + # we want to build a specific version, not re-resolve to the newest. + if resolved_version is not None: + # In all-versions mode, we already know the version. Look up the + # source URL from the resolved requirements cache or resolve again. + try: + # Create a pinned requirement to get the source URL + pinned_req = Requirement(f"{req.name}=={resolved_version}") + source_url, _ = self.resolve_version( + req=pinned_req, + req_type=req_type, + ) + except Exception as err: + if not self.test_mode: + raise + self._record_test_mode_failure( + req, str(resolved_version), err, "resolution" + ) + return + else: + # Normal resolution: find the best matching version + try: + source_url, resolved_version = self.resolve_version( + req=req, + req_type=req_type, + ) + except Exception as err: + if not self.test_mode: + raise + self._record_test_mode_failure(req, None, err, "resolution") + return # Capture parent before _track_why pushes current package onto the stack parent: tuple[Requirement, Version] | None = None @@ -408,17 +625,149 @@ def _bootstrap_impl( constraint=constraint, ) - self.progressbar.update_total(len(install_dependencies)) - for dep in self._sort_requirements(install_dependencies): - with req_ctxvar_context(dep): - # In test mode, bootstrap() catches and records failures internally. - # In normal mode, it raises immediately which we propagate. - self.bootstrap(req=dep, req_type=RequirementType.INSTALL) - self.progressbar.update() + # Process install dependencies. In all_versions mode, we resolve and + # bootstrap ALL matching versions of each dependency, not just the newest. + # This is key to building a complete package index with all historical versions. + if self.all_versions: + self._bootstrap_all_dependency_versions(install_dependencies) + else: + # Normal mode: bootstrap only the newest matching version of each dependency + self.progressbar.update_total(len(install_dependencies)) + for dep in self._sort_requirements(install_dependencies): + with req_ctxvar_context(dep): + # In test mode, bootstrap() catches and records failures internally. + # In normal mode, it raises immediately which we propagate. + self.bootstrap(req=dep, req_type=RequirementType.INSTALL) + self.progressbar.update() # Clean up build directories self.ctx.clean_build_dirs(build_result.sdist_root_dir, build_result.build_env) + def _bootstrap_all_dependency_versions( + self, + install_dependencies: list[Requirement], + ) -> None: + """Bootstrap all matching versions of each install dependency. + + This method is used in all_versions mode to build every version of each + dependency that matches the requirement specifier, not just the newest. + This is essential for populating a complete package index with historical + versions. + + The key optimization (per issue #878) is that versions already present in + the cache server are skipped completely - neither their build nor install + dependencies are processed. This filtering happens upfront to avoid + expensive recursive bootstrapping of already-built packages. + + Args: + install_dependencies: List of install dependency requirements extracted + from the package being bootstrapped. + + Note: + Pre-built packages only have their newest version bootstrapped, as + we're downloading wheels rather than building from source. + """ + for dep in self._sort_requirements(install_dependencies): + with req_ctxvar_context(dep): + pbi = self.ctx.package_build_info(dep) + + # For pre-built packages, we only want the newest version since + # we're downloading wheels, not building them. + if pbi.pre_built: + logger.debug( + "%s is pre-built, bootstrapping newest version only", + dep.name, + ) + self.progressbar.update_total(1) + self.bootstrap(req=dep, req_type=RequirementType.INSTALL) + self.progressbar.update() + continue + + # Resolve ALL versions matching this dependency's specifier. + # This uses the same resolution logic as top-level packages. + try: + all_candidates = resolver.resolve_all_versions( + ctx=self.ctx, + req=dep, + sdist_server_url=resolver.PYPI_SERVER_URL, + include_sdists=True, + include_wheels=False, # Build from source + req_type=RequirementType.INSTALL, + ) + except Exception as err: + if not self.test_mode: + raise + self._record_test_mode_failure(dep, None, err, "resolution") + continue + + if not all_candidates: + logger.warning("no matching versions found for dependency %s", dep) + continue + + logger.info( + "all-versions mode: found %d matching versions for dependency %s", + len(all_candidates), + dep, + ) + + # Filter out versions that already exist in the cache server. + # This is the key optimization: we skip building versions that + # have already been built and uploaded to the cache server. + versions_to_build: list[tuple[str, Version]] = [] + for candidate in all_candidates: + version = candidate.version + source_url = candidate.url + + # Check if this version already exists in the cache server. + if self._version_exists_in_cache(dep, version): + logger.info( + "skipping dependency %s==%s: already exists in cache server", + dep.name, + version, + ) + continue + + versions_to_build.append((source_url, version)) + + logger.info( + "all-versions mode: %d versions of %s will be built (after cache filtering)", + len(versions_to_build), + dep.name, + ) + + # Update progress bar and bootstrap each version + self.progressbar.update_total(len(versions_to_build)) + for source_url, version in versions_to_build: + # Log that we're including this dependency version for build + logger.info("including %s==%s for build", dep.name, version) + + # Add to dependency graph before bootstrapping + parent: tuple[Requirement, Version] | None = None + if self.why: + _, parent_req, parent_version = self.why[-1] + parent = (parent_req, parent_version) + + self.ctx.dependency_graph.add_dependency( + parent_name=canonicalize_name(parent[0].name) + if parent + else None, + parent_version=parent[1] if parent else None, + req_type=RequirementType.INSTALL, + req=dep, + req_version=version, + download_url=source_url, + pre_built=pbi.pre_built, + constraint=self.ctx.constraints.get_constraint(dep.name), + ) + + # Bootstrap this specific version + self.bootstrap( + req=dep, + req_type=RequirementType.INSTALL, + resolved_version=version, + ) + self.progressbar.update() + @contextlib.contextmanager def _track_why( self, diff --git a/src/fromager/commands/bootstrap.py b/src/fromager/commands/bootstrap.py index f42cfb71..1fa0d208 100644 --- a/src/fromager/commands/bootstrap.py +++ b/src/fromager/commands/bootstrap.py @@ -103,6 +103,17 @@ def _get_requirements_from_args( default=False, help="Test mode: continue processing after failures, report failures at end", ) +@click.option( + "--all-versions", + "all_versions", + is_flag=True, + default=False, + help=( + "Build all versions of packages that match the requirements, not just the " + "newest. Versions already in the cache server are skipped automatically. " + "Use with --skip-constraints since multiple versions cannot be unified." + ), +) @click.argument("toplevel", nargs=-1) @click.pass_obj def bootstrap( @@ -113,6 +124,7 @@ def bootstrap( sdist_only: bool, skip_constraints: bool, test_mode: bool, + all_versions: bool, toplevel: list[str], ) -> None: """Compute and build the dependencies of a set of requirements recursively @@ -120,6 +132,10 @@ def bootstrap( TOPLEVEL is a requirements specification, including a package name and optional version constraints. + When --all-versions is specified, builds all matching versions of top-level + packages instead of just the newest version. This is useful for populating + a general-purpose package index with historical versions. + """ logger.info(f"cache wheel server url: {cache_wheel_server_url}") @@ -147,6 +163,21 @@ def bootstrap( "test mode enabled: will continue processing after failures and report at end" ) + # When all_versions mode is enabled, we build every version of each package + # that matches the requirements, not just the newest version. This is useful + # for populating a general-purpose package index. + if all_versions: + logger.info( + "all-versions mode enabled: building all matching versions of top-level packages" + ) + # Warn if skip_constraints is not also enabled, since multiple versions + # of the same package cannot be unified into a single constraints file. + if not skip_constraints: + logger.warning( + "all-versions mode works best with --skip-constraints since " + "multiple versions cannot be unified into constraints.txt" + ) + pre_built = wkctx.settings.list_pre_built() if pre_built: logger.info("treating %s as pre-built wheels", sorted(pre_built)) @@ -161,6 +192,7 @@ def bootstrap( cache_wheel_server_url, sdist_only=sdist_only, test_mode=test_mode, + all_versions=all_versions, ) # Pre-resolution phase: Resolve all top-level dependencies before recursive @@ -169,20 +201,39 @@ def bootstrap( # - In test-mode: exceptions are caught inside resolve_and_add_top_level() # - In normal mode: exceptions should propagate with context preserved for logging logger.info("resolving top-level dependencies before building") - resolved_reqs: list[Requirement] = [] + + # When all_versions mode is enabled, we need to track all resolved + # (requirement, version) pairs for bootstrapping, not just requirements. + # Each top-level requirement may resolve to multiple versions. + resolved_req_versions: list[tuple[Requirement, Version]] = [] + for req in to_build: token = requirement_ctxvar.set(req) - result = bt.resolve_and_add_top_level(req) - if result is not None: - resolved_reqs.append(req) + if all_versions: + # In all-versions mode, resolve all matching versions for each + # top-level requirement. This returns a list of (url, version) pairs. + results = bt.resolve_and_add_top_level_all_versions(req) + for _, version in results: + resolved_req_versions.append((req, version)) + progressbar.update_total(1) + else: + # Normal mode: resolve only the newest matching version. + result = bt.resolve_and_add_top_level(req) + if result is not None: + _, version = result + resolved_req_versions.append((req, version)) # If result is None, test_mode recorded the failure and we continue requirement_ctxvar.reset(token) - # Bootstrap only packages that were successfully resolved - # Note: Same pattern - no try/finally to preserve context for error logging - for req in resolved_reqs: + # Bootstrap only packages that were successfully resolved. + # In all_versions mode, we may have multiple versions per requirement. + for req, version in resolved_req_versions: token = requirement_ctxvar.set(req) - bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL) + bt.bootstrap( + req, + requirements_file.RequirementType.TOP_LEVEL, + resolved_version=version if all_versions else None, + ) progressbar.update() requirement_ctxvar.reset(token) @@ -463,6 +514,17 @@ def write_constraints_file( default=None, help="maximum number of parallel workers to run (default: unlimited)", ) +@click.option( + "--all-versions", + "all_versions", + is_flag=True, + default=False, + help=( + "Build all versions of packages that match the requirements, not just the " + "newest. Versions already in the cache server are skipped automatically. " + "Use with --skip-constraints since multiple versions cannot be unified." + ), +) @click.argument("toplevel", nargs=-1) @click.pass_obj @click.pass_context @@ -476,6 +538,7 @@ def bootstrap_parallel( skip_constraints: bool, force: bool, max_workers: int | None, + all_versions: bool, toplevel: list[str], ) -> None: """Bootstrap and build-parallel @@ -502,6 +565,7 @@ def bootstrap_parallel( cache_wheel_server_url=cache_wheel_server_url, sdist_only=True, skip_constraints=skip_constraints, + all_versions=all_versions, toplevel=toplevel, ) diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index 1ab015a8..495377fd 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -101,6 +101,127 @@ def resolve( return resolve_from_provider(provider, req) +def resolve_all_versions( + *, + ctx: context.WorkContext, + req: Requirement, + sdist_server_url: str, + include_sdists: bool = True, + include_wheels: bool = True, + req_type: RequirementType | None = None, + ignore_platform: bool = False, +) -> list[Candidate]: + """Resolve all versions of a package that match the requirement. + + Unlike resolve() which returns only the best matching version, this function + returns ALL versions that satisfy the requirement specifier and constraints. + This is used by the "all-versions" mode of the bootstrap command to build + multiple versions of each package. + + The returned candidates are sorted by version in descending order (newest first), + which matches the default resolution behavior. + + Args: + ctx: The work context containing constraints and settings. + req: The requirement to resolve (e.g., "requests>=2.0,<3.0"). + sdist_server_url: The URL of the package server to query. + include_sdists: Whether to include sdist distributions in results. + include_wheels: Whether to include wheel distributions in results. + req_type: The type of requirement (TOP_LEVEL, INSTALL, BUILD_*, etc.). + ignore_platform: Whether to ignore platform-specific wheel filtering. + + Returns: + A list of Candidate objects representing all matching versions, + sorted by version descending. Each candidate contains the download URL + and version information needed for bootstrapping. + + Example: + >>> candidates = resolve_all_versions( + ... ctx=work_ctx, + ... req=Requirement("numpy>=1.20,<2.0"), + ... sdist_server_url="https://pypi.org/simple", + ... ) + >>> for c in candidates: + ... print(f"{c.name}=={c.version}") + numpy==1.26.4 + numpy==1.25.2 + numpy==1.24.3 + ... + """ + # Create the provider using the same override mechanism as regular resolution. + # This ensures consistent behavior with any package-specific resolution plugins. + provider = overrides.find_and_invoke( + req.name, + "get_resolver_provider", + default_resolver_provider, + ctx=ctx, + req=req, + include_sdists=include_sdists, + include_wheels=include_wheels, + sdist_server_url=sdist_server_url, + req_type=req_type, + ignore_platform=ignore_platform, + ) + return get_all_matching_candidates(provider, req) + + +def get_all_matching_candidates( + provider: BaseProvider, req: Requirement +) -> list[Candidate]: + """Get all candidates that match a requirement from a provider. + + This function bypasses the resolvelib resolver and directly queries the + provider for all candidates that satisfy the requirement. It applies the + same filtering as normal resolution (constraints, specifiers, etc.) but + returns all matches instead of just the best one. + + Args: + provider: The resolver provider (PyPIProvider, GitHubTagProvider, etc.) + req: The requirement to match against. + + Returns: + List of matching Candidate objects, sorted by version descending. + + Note: + This function is designed to support the "all-versions" bootstrap mode. + It reuses the provider's validation logic to ensure consistency with + normal resolution behavior. + """ + identifier = provider.identify(req) + + # Get all unfiltered candidates from the provider. We use the internal + # _find_cached_candidates method to leverage the provider's caching. + unfiltered_candidates = list(provider._find_cached_candidates(identifier)) + + # Apply the same filtering that would be applied during normal resolution. + # We pass empty incompatibilities since we're not doing backtracking. + requirements_map: dict[str, list[Requirement]] = {identifier: [req]} + incompatibilities_map: dict[str, list[Candidate]] = {identifier: []} + + matching_candidates: list[Candidate] = [] + for candidate in unfiltered_candidates: + if provider.validate_candidate( + identifier, requirements_map, incompatibilities_map, candidate + ): + matching_candidates.append(candidate) + + if not matching_candidates: + logger.warning("resolve_all_versions: no matching candidates found for %s", req) + return [] + + # Sort by version descending (newest first), then by build tag. + # This matches the ordering used by find_matches() in normal resolution. + matching_candidates.sort(key=attrgetter("version", "build_tag"), reverse=True) + + logger.info( + "resolve_all_versions: found %d matching versions for %s", + len(matching_candidates), + req, + ) + + return matching_candidates + + def default_resolver_provider( ctx: context.WorkContext, req: Requirement, diff --git a/tests/test_bootstrapper.py b/tests/test_bootstrapper.py index f87f1a30..540fc460 100644 --- a/tests/test_bootstrapper.py +++ b/tests/test_bootstrapper.py @@ -511,3 +511,366 @@ def test_build_from_source_returns_dataclass(tmp_context: WorkContext) -> None: assert result.sdist_root_dir == mock_sdist_root assert result.build_env is not None assert result.source_type == SourceType.SDIST + + +# ============================================================================= +# Tests for all_versions mode (Issue #878) +# +# These tests verify the "all-versions" mode functionality which allows +# building multiple versions of packages instead of just the newest. +# ============================================================================= + + +def test_bootstrapper_all_versions_flag(tmp_context: WorkContext) -> None: + """Verify Bootstrapper accepts all_versions parameter. + + The all_versions flag should be stored and accessible on the + Bootstrapper instance. + """ + # Default is False + bt1 = bootstrapper.Bootstrapper(tmp_context) + assert bt1.all_versions is False + + # Can be explicitly set to True + bt2 = bootstrapper.Bootstrapper(tmp_context, all_versions=True) + assert bt2.all_versions is True + + +def test_version_exists_in_cache_no_cache_url(tmp_context: WorkContext) -> None: + """Verify _version_exists_in_cache returns False when no cache URL is set. + + When there's no cache wheel server URL configured, the function should + return False immediately without attempting any network operations. + """ + bt = bootstrapper.Bootstrapper(tmp_context, cache_wheel_server_url=None) + # Override to ensure no cache URL + bt.cache_wheel_server_url = "" + + result = bt._version_exists_in_cache( + req=Requirement("test-package"), + version=Version("1.0.0"), + ) + + assert result is False + + +def test_version_exists_in_cache_not_found(tmp_context: WorkContext) -> None: + """Verify _version_exists_in_cache returns False when version not in cache. + + When the cache server doesn't have the requested version, the function + should catch the resolution exception and return False. + """ + bt = bootstrapper.Bootstrapper( + tmp_context, cache_wheel_server_url="http://cache.example.com/simple" + ) + + # Mock the resolver to raise an exception (version not found) + with patch("fromager.resolver.resolve", side_effect=Exception("Not found")): + result = bt._version_exists_in_cache( + req=Requirement("test-package"), + version=Version("1.0.0"), + ) + + assert result is False + + +def test_version_exists_in_cache_found(tmp_context: WorkContext) -> None: + """Verify _version_exists_in_cache returns True when version is in cache. + + When the cache server has the requested version with a matching build tag, + the function should return True. + """ + from fromager import wheels + + bt = bootstrapper.Bootstrapper( + tmp_context, cache_wheel_server_url="http://cache.example.com/simple" + ) + + # Mock the resolver to return a successful result + mock_url = "http://cache.example.com/simple/test-package/test_package-1.0.0-py3-none-any.whl" + with ( + patch("fromager.resolver.resolve", return_value=(mock_url, Version("1.0.0"))), + patch.object( + wheels, + "extract_info_from_wheel_file", + return_value=("test-package", Version("1.0.0"), None, None), + ), + ): + result = bt._version_exists_in_cache( + req=Requirement("test-package"), + version=Version("1.0.0"), + ) + + assert result is True + + +def test_resolve_and_add_top_level_all_versions_returns_list( + tmp_context: WorkContext, +) -> None: + """Verify resolve_and_add_top_level_all_versions returns a list of versions. + + This method should return a list of (url, version) tuples for all matching + versions of the requirement. + """ + from fromager import resolver + from fromager.candidate import Candidate + + bt = bootstrapper.Bootstrapper(tmp_context, all_versions=True) + + # Create mock candidates + mock_candidates = [ + Candidate( + name="test-package", + version=Version("2.0.0"), + url="http://pypi.org/simple/test-package/test-package-2.0.0.tar.gz", + is_sdist=True, + ), + Candidate( + name="test-package", + version=Version("1.5.0"), + url="http://pypi.org/simple/test-package/test-package-1.5.0.tar.gz", + is_sdist=True, + ), + Candidate( + name="test-package", + version=Version("1.0.0"), + url="http://pypi.org/simple/test-package/test-package-1.0.0.tar.gz", + is_sdist=True, + ), + ] + + with ( + patch.object(resolver, "resolve_all_versions", return_value=mock_candidates), + patch.object(bt, "_version_exists_in_cache", return_value=False), + ): + results = bt.resolve_and_add_top_level_all_versions( + Requirement("test-package>=1.0") + ) + + # Should return all 3 versions + assert len(results) == 3 + versions = [v for _, v in results] + assert Version("2.0.0") in versions + assert Version("1.5.0") in versions + assert Version("1.0.0") in versions + + +def test_resolve_and_add_top_level_all_versions_filters_cached( + tmp_context: WorkContext, +) -> None: + """Verify resolve_and_add_top_level_all_versions filters out cached versions. + + Versions that already exist in the cache server should be excluded from + the returned list to avoid redundant builds. + """ + from fromager import resolver + from fromager.candidate import Candidate + + bt = bootstrapper.Bootstrapper( + tmp_context, + all_versions=True, + cache_wheel_server_url="http://cache.example.com/simple", + ) + + # Create mock candidates + mock_candidates = [ + Candidate( + name="test-package", + version=Version("2.0.0"), + url="http://pypi.org/simple/test-package/test-package-2.0.0.tar.gz", + is_sdist=True, + ), + Candidate( + name="test-package", + version=Version("1.0.0"), + url="http://pypi.org/simple/test-package/test-package-1.0.0.tar.gz", + is_sdist=True, + ), + ] + + def mock_cache_check(req: Requirement, version: Version) -> bool: + # Version 1.0.0 is already in cache, 2.0.0 is not + return version == Version("1.0.0") + + with ( + patch.object(resolver, "resolve_all_versions", return_value=mock_candidates), + patch.object(bt, "_version_exists_in_cache", side_effect=mock_cache_check), + ): + results = bt.resolve_and_add_top_level_all_versions( + Requirement("test-package>=1.0") + ) + + # Should only return version 2.0.0 (1.0.0 is cached) + assert len(results) == 1 + _, version = results[0] + assert version == Version("2.0.0") + + +def test_bootstrap_with_resolved_version(tmp_context: WorkContext) -> None: + """Verify bootstrap() accepts optional resolved_version parameter. + + In all-versions mode, the version is already known from pre-resolution, + so bootstrap() should use that version directly instead of re-resolving. + """ + bt = bootstrapper.Bootstrapper(tmp_context, all_versions=True) + + # Mark a version as seen so bootstrap exits early (avoiding full build) + version = Version("1.5.0") + bt._mark_as_seen(Requirement("test-package"), version, sdist_only=False) + + # Mock resolve_version to track if it's called with the pinned requirement + resolve_calls: list[Requirement] = [] + + def mock_resolve( + req: Requirement, req_type: RequirementType + ) -> tuple[str, Version]: + resolve_calls.append(req) + return ("http://example.com/url", version) + + with patch.object(bt, "resolve_version", side_effect=mock_resolve): + # Call bootstrap with a pre-resolved version + bt.bootstrap( + Requirement("test-package>=1.0"), + RequirementType.TOP_LEVEL, + resolved_version=version, + ) + + # Verify resolve_version was called with a pinned requirement + assert len(resolve_calls) == 1 + assert "==1.5.0" in str(resolve_calls[0]) + + +def test_bootstrap_all_dependency_versions(tmp_context: WorkContext) -> None: + """Verify _bootstrap_all_dependency_versions resolves all versions of dependencies. + + In all-versions mode, the bootstrapper should resolve and build ALL matching + versions of each install dependency, not just the newest version. + """ + from fromager import resolver + from fromager.candidate import Candidate + + bt = bootstrapper.Bootstrapper(tmp_context, all_versions=True) + + # Set up the why stack to simulate being inside a package bootstrap + bt.why = [(RequirementType.TOP_LEVEL, Requirement("parent-pkg"), Version("1.0.0"))] + + # Add the parent node to the dependency graph (required before adding children) + tmp_context.dependency_graph.add_dependency( + parent_name=None, + parent_version=None, + req_type=RequirementType.TOP_LEVEL, + req=Requirement("parent-pkg"), + req_version=Version("1.0.0"), + ) + + # Create mock candidates for the dependency + mock_candidates = [ + Candidate( + name="dep-package", + version=Version("2.0.0"), + url="http://pypi.org/simple/dep-package/dep-package-2.0.0.tar.gz", + is_sdist=True, + ), + Candidate( + name="dep-package", + version=Version("1.5.0"), + url="http://pypi.org/simple/dep-package/dep-package-1.5.0.tar.gz", + is_sdist=True, + ), + ] + + # Track which versions get bootstrapped + bootstrapped_versions: list[Version] = [] + + def mock_bootstrap( + req: Requirement, + req_type: RequirementType, + resolved_version: Version | None = None, + ) -> None: + if resolved_version: + bootstrapped_versions.append(resolved_version) + + with ( + patch.object(resolver, "resolve_all_versions", return_value=mock_candidates), + patch.object(bt, "_version_exists_in_cache", return_value=False), + patch.object(bt, "bootstrap", side_effect=mock_bootstrap), + ): + install_deps = [Requirement("dep-package>=1.0")] + bt._bootstrap_all_dependency_versions(install_deps) + + # Both versions should be bootstrapped + assert len(bootstrapped_versions) == 2 + assert Version("2.0.0") in bootstrapped_versions + assert Version("1.5.0") in bootstrapped_versions + + +def test_bootstrap_all_dependency_versions_filters_cached( + tmp_context: WorkContext, +) -> None: + """Verify _bootstrap_all_dependency_versions filters out cached versions. + + Versions of dependencies that already exist in the cache server should + be skipped to avoid redundant builds. + """ + from fromager import resolver + from fromager.candidate import Candidate + + bt = bootstrapper.Bootstrapper( + tmp_context, + all_versions=True, + cache_wheel_server_url="http://cache.example.com/simple", + ) + + # Set up the why stack + bt.why = [(RequirementType.TOP_LEVEL, Requirement("parent-pkg"), Version("1.0.0"))] + + # Add the parent node to the dependency graph (required before adding children) + tmp_context.dependency_graph.add_dependency( + parent_name=None, + parent_version=None, + req_type=RequirementType.TOP_LEVEL, + req=Requirement("parent-pkg"), + req_version=Version("1.0.0"), + ) + + # Create mock candidates + mock_candidates = [ + Candidate( + name="dep-package", + version=Version("2.0.0"), + url="http://pypi.org/simple/dep-package/dep-package-2.0.0.tar.gz", + is_sdist=True, + ), + Candidate( + name="dep-package", + version=Version("1.0.0"), + url="http://pypi.org/simple/dep-package/dep-package-1.0.0.tar.gz", + is_sdist=True, + ), + ] + + def mock_cache_check(req: Requirement, version: Version) -> bool: + # Version 1.0.0 is cached, 2.0.0 is not + return version == Version("1.0.0") + + bootstrapped_versions: list[Version] = [] + + def mock_bootstrap( + req: Requirement, + req_type: RequirementType, + resolved_version: Version | None = None, + ) -> None: + if resolved_version: + bootstrapped_versions.append(resolved_version) + + with ( + patch.object(resolver, "resolve_all_versions", return_value=mock_candidates), + patch.object(bt, "_version_exists_in_cache", side_effect=mock_cache_check), + patch.object(bt, "bootstrap", side_effect=mock_bootstrap), + ): + install_deps = [Requirement("dep-package>=1.0")] + bt._bootstrap_all_dependency_versions(install_deps) + + # Only version 2.0.0 should be bootstrapped (1.0.0 is cached) + assert len(bootstrapped_versions) == 1 + assert bootstrapped_versions[0] == Version("2.0.0") diff --git a/tests/test_resolver.py b/tests/test_resolver.py index cc7c5593..863c1cfa 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -1109,3 +1109,172 @@ def custom_resolver_provider( assert "pypi.org" not in error_message.lower(), ( f"Error message incorrectly mentions PyPI when using GitHub resolver: {error_message}" ) + + +# ============================================================================= +# Tests for resolve_all_versions() and get_all_matching_candidates() +# +# These tests verify the "all-versions" mode functionality added for Issue #878 +# which allows building multiple versions of packages instead of just the newest. +# ============================================================================= + + +def test_get_all_matching_candidates_returns_all_versions() -> None: + """Verify get_all_matching_candidates returns all matching versions. + + When building multiple versions of a package, we need ALL versions that match + the requirement specifier, not just the newest one. This test ensures the + function correctly returns all candidates that satisfy the requirement. + """ + with requests_mock.Mocker() as r: + r.get( + "https://pypi.org/simple/numpy/", + text=_numpy_simple_response, + ) + + provider = resolver.PyPIProvider(include_sdists=False) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("numpy>=1.24,<2.1") + ) + + # Should get all 3 versions that match >=1.24,<2.1 + # (excludes 2.2.0 which is >= 2.1) + versions = [c.version for c in candidates] + assert Version("1.24.0") in versions + assert Version("1.26.4") in versions + assert Version("2.0.0") in versions + assert Version("2.2.0") not in versions + + # Verify sorted by version descending (newest first) + assert versions == sorted(versions, reverse=True) + + +def test_get_all_matching_candidates_with_constraint() -> None: + """Verify constraints are honored when getting all matching candidates. + + When using --all-versions mode, constraints should still filter out versions + that don't match the project's constraints file. + """ + constraint = constraints.Constraints() + constraint.add_constraint("hydra-core>=1.3,<1.4") + + with requests_mock.Mocker() as r: + r.get( + "https://pypi.org/simple/hydra-core/", + text=_hydra_core_simple_response, + ) + + provider = resolver.PyPIProvider(include_sdists=True, constraints=constraint) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("hydra-core") + ) + + # With constraint >=1.3,<1.4, only 1.3.x versions should match + versions = [str(c.version) for c in candidates] + assert "1.3.2" in versions + assert "1.2.2" not in versions # Excluded by >=1.3 constraint + + +def test_get_all_matching_candidates_empty_result() -> None: + """Verify empty result when no candidates match the requirement. + + When constraints or specifiers exclude all versions, the function should + return an empty list rather than raising an exception. + """ + constraint = constraints.Constraints() + constraint.add_constraint("hydra-core>=99.0") + + with requests_mock.Mocker() as r: + r.get( + "https://pypi.org/simple/hydra-core/", + text=_hydra_core_simple_response, + ) + + provider = resolver.PyPIProvider(include_sdists=True, constraints=constraint) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("hydra-core") + ) + + # No versions match >=99.0, should return empty list + assert candidates == [] + + +def test_get_all_matching_candidates_sdist_only() -> None: + """Verify only sdist candidates are returned when wheels are excluded. + + In --all-versions mode for building from source, we typically want only + sdists so we can build wheels ourselves. + """ + with requests_mock.Mocker() as r: + r.get( + "https://pypi.org/simple/hydra-core/", + text=_hydra_core_simple_response, + ) + + # include_sdists=True, include_wheels=False to get only sdists + provider = resolver.PyPIProvider(include_sdists=True, include_wheels=False) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("hydra-core") + ) + + # Should only return sdist candidates + for candidate in candidates: + assert candidate.is_sdist is True + + # Verify we get the expected versions (only sdists exist for 1.2.2 and 1.3.2) + versions = [str(c.version) for c in candidates] + assert "1.2.2" in versions + assert "1.3.2" in versions + + +def test_get_all_matching_candidates_github_provider() -> None: + """Verify get_all_matching_candidates works with GitHubTagProvider. + + The all-versions mode should work with custom providers, not just PyPI. + """ + with requests_mock.Mocker() as r: + r.get( + "https://api.github.com:443/repos/python-wheel-build/fromager/tags", + text=_github_fromager_tag_response, + ) + + provider = resolver.GitHubTagProvider( + organization="python-wheel-build", repo="fromager" + ) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("fromager>=0.5,<0.9") + ) + + # Should get versions 0.5.0 through 0.8.1 (5 versions) + versions = [str(c.version) for c in candidates] + assert "0.5.0" in versions + assert "0.6.0" in versions + assert "0.7.0" in versions + assert "0.8.0" in versions + assert "0.8.1" in versions + assert "0.9.0" not in versions # Excluded by <0.9 + assert "0.4.0" not in versions # Excluded by >=0.5 + + +def test_get_all_matching_candidates_respects_order() -> None: + """Verify candidates are sorted by version descending (newest first). + + This ordering is important for the --all-versions mode because it allows + users to see the most recent versions first and ensures consistent behavior. + """ + with requests_mock.Mocker() as r: + r.get( + "https://pypi.org/simple/numpy/", + text=_numpy_simple_response, + ) + + provider = resolver.PyPIProvider(include_sdists=False) + candidates = resolver.get_all_matching_candidates( + provider, Requirement("numpy") + ) + + versions = [c.version for c in candidates] + # Should be in descending order (newest first) + assert versions == sorted(versions, reverse=True) + assert versions[0] == Version("2.2.0") # Newest first + assert versions[-1] == Version("1.24.0") # Oldest last