From e9d252779a8823628e649d14164d9638f38a2c43 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 01:42:02 +0000 Subject: [PATCH 1/3] Initial plan From 9f01a4dafa95df1c1a6d6accb7700af80899410c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 01:50:36 +0000 Subject: [PATCH 2/3] feat: Add LocatorCache abstraction and refactor pet-conda and pet-linux-global-python Co-authored-by: karthiknadig <3840081+karthiknadig@users.noreply.github.com> --- crates/pet-conda/src/lib.rs | 111 ++++--------- crates/pet-core/src/cache.rs | 182 ++++++++++++++++++++++ crates/pet-core/src/lib.rs | 1 + crates/pet-linux-global-python/src/lib.rs | 24 ++- 4 files changed, 226 insertions(+), 92 deletions(-) create mode 100644 crates/pet-core/src/cache.rs diff --git a/crates/pet-conda/src/lib.rs b/crates/pet-conda/src/lib.rs index 0f4ebaf6..bec4eaa1 100644 --- a/crates/pet-conda/src/lib.rs +++ b/crates/pet-conda/src/lib.rs @@ -11,6 +11,7 @@ use environments::{get_conda_environment_info, CondaEnvironment}; use log::error; use manager::CondaManager; use pet_core::{ + cache::LocatorCache, env::PythonEnv, os_environment::Environment, python_environment::{PythonEnvironment, PythonEnvironmentKind}, @@ -20,7 +21,6 @@ use pet_core::{ use pet_fs::path::norm_case; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, path::{Path, PathBuf}, sync::{Arc, RwLock}, thread, @@ -61,8 +61,8 @@ pub struct CondaTelemetryInfo { } pub struct Conda { - pub environments: Arc>>, - pub managers: Arc>>, + pub environments: Arc>, + pub managers: Arc>, pub env_vars: EnvVariables, conda_executable: Arc>>, } @@ -70,15 +70,15 @@ pub struct Conda { impl Conda { pub fn from(env: &dyn Environment) -> Conda { Conda { - environments: Arc::new(RwLock::new(HashMap::new())), - managers: Arc::new(RwLock::new(HashMap::new())), + environments: Arc::new(LocatorCache::new()), + managers: Arc::new(LocatorCache::new()), env_vars: EnvVariables::from(env), conda_executable: Arc::new(RwLock::new(None)), } } fn clear(&self) { - self.environments.write().unwrap().clear(); - self.managers.write().unwrap().clear(); + self.environments.clear(); + self.managers.clear(); } } @@ -91,17 +91,17 @@ impl CondaLocator for Conda { // Look for environments that we couldn't find without spawning conda. let user_provided_conda_exe = conda_executable.is_some(); let conda_info = CondaInfo::from(conda_executable)?; - let environments = self.environments.read().unwrap().clone(); + let environments_map = self.environments.clone_map(); let new_envs = conda_info .envs .clone() .into_iter() - .filter(|p| !environments.contains_key(p)) + .filter(|p| !environments_map.contains_key(p)) .collect::>(); if new_envs.is_empty() { return None; } - let environments = environments + let environments = environments_map .into_values() .collect::>(); @@ -119,10 +119,7 @@ impl CondaLocator for Conda { fn get_info_for_telemetry(&self, conda_executable: Option) -> CondaTelemetryInfo { let can_spawn_conda = CondaInfo::from(conda_executable).is_some(); - let environments = self.environments.read().unwrap().clone(); - let environments = environments - .into_values() - .collect::>(); + let environments = self.environments.values(); let (conda_rcs, env_dirs) = get_conda_rcs_and_env_dirs(&self.env_vars, &environments); let mut environments_txt = None; let mut environments_txt_exists = None; @@ -159,19 +156,14 @@ impl CondaLocator for Conda { if let Some(conda_dir) = manager.conda_dir.clone() { // Keep track to search again later. // Possible we'll find environments in other directories created using this manager - let mut managers = self.managers.write().unwrap(); - // Keep track to search again later. - // Possible we'll find environments in other directories created using this manager - managers.insert(conda_dir.clone(), manager.clone()); - drop(managers); + self.managers.insert(conda_dir.clone(), manager.clone()); // Find all the environments in the conda install folder. (under `envs` folder) for conda_env in get_conda_environments(&get_environments(&conda_dir), &manager.clone().into()) { // If reported earlier, no point processing this again. - let mut environments = self.environments.write().unwrap(); - if environments.contains_key(&conda_env.prefix) { + if self.environments.contains_key(&conda_env.prefix) { continue; } @@ -183,7 +175,8 @@ impl CondaLocator for Conda { .and_then(|p| CondaManager::from(&p)) .unwrap_or(manager.clone()); let env = conda_env.to_python_environment(Some(manager.to_manager())); - environments.insert(conda_env.prefix.clone(), env.clone()); + self.environments + .insert(conda_env.prefix.clone(), env.clone()); reporter.report_manager(&manager.to_manager()); reporter.report_environment(&env); } @@ -194,22 +187,8 @@ impl CondaLocator for Conda { impl Conda { fn get_manager(&self, conda_dir: &Path) -> Option { - // First try to read from cache - { - let managers = self.managers.read().unwrap(); - if let Some(mgr) = managers.get(conda_dir) { - return Some(mgr.clone()); - } - } - - // If not found, acquire write lock and insert - if let Some(manager) = CondaManager::from(conda_dir) { - let mut managers = self.managers.write().unwrap(); - managers.insert(conda_dir.into(), manager.clone()); - Some(manager) - } else { - None - } + self.managers + .get_or_insert_with(conda_dir.to_path_buf(), || CondaManager::from(conda_dir)) } } @@ -250,20 +229,17 @@ impl Locator for Conda { return None; } - // First check cache with read lock - { - let environments = self.environments.read().unwrap(); - if let Some(env) = environments.get(path) { - return Some(env.clone()); - } + // Check cache first + if let Some(cached_env) = self.environments.get(path) { + return Some(cached_env); } - // Not in cache, build the environment and insert with write lock + + // Not in cache, build the environment and insert if let Some(env) = get_conda_environment_info(path, &None) { if let Some(conda_dir) = &env.conda_dir { if let Some(manager) = self.get_manager(conda_dir) { let env = env.to_python_environment(Some(manager.to_manager())); - let mut environments = self.environments.write().unwrap(); - environments.insert(path.clone(), env.clone()); + self.environments.insert(path.clone(), env.clone()); return Some(env); } else { // We will still return the conda env even though we do not have the manager. @@ -271,8 +247,7 @@ impl Locator for Conda { // The client can activate this env either using another conda manager or using the activation scripts error!("Unable to find Conda Manager for env (even though we have a conda_dir): {:?}", env); let env = env.to_python_environment(None); - let mut environments = self.environments.write().unwrap(); - environments.insert(path.clone(), env.clone()); + self.environments.insert(path.clone(), env.clone()); return Some(env); } } else { @@ -281,8 +256,7 @@ impl Locator for Conda { // The client can activate this env either using another conda manager or using the activation scripts error!("Unable to find Conda Manager for env: {:?}", env); let env = env.to_python_environment(None); - let mut environments = self.environments.write().unwrap(); - environments.insert(path.clone(), env.clone()); + self.environments.insert(path.clone(), env.clone()); return Some(env); } } @@ -315,8 +289,7 @@ impl Locator for Conda { error!("Unable to find Conda Manager for the Conda env: {:?}", env); let prefix = env.prefix.clone(); let env = env.to_python_environment(None); - let mut environments = self.environments.write().unwrap(); - environments.insert(prefix, env.clone()); + self.environments.insert(prefix, env.clone()); reporter.report_environment(&env); return None; } @@ -325,38 +298,23 @@ impl Locator for Conda { // We will try to get the manager for this conda_dir let prefix = env.clone().prefix.clone(); - { - // 3.1 Check if we have already reported this environment. - // Closure to quickly release lock - let environments = self.environments.read().unwrap(); - if environments.contains_key(&env.prefix) { - return None; - } + // 3.1 Check if we have already reported this environment. + if self.environments.contains_key(&env.prefix) { + return None; } - // 4 Get the manager for this env. let conda_dir = &env.conda_dir.clone()?; - let managers = self.managers.read().unwrap(); - let mut manager = managers.get(conda_dir).cloned(); - drop(managers); - - if manager.is_none() { - // 4.1 Build the manager from the conda dir if we do not have it. - if let Some(conda_manager) = CondaManager::from(conda_dir) { - let mut managers = self.managers.write().unwrap(); - managers.insert(conda_dir.to_path_buf().clone(), conda_manager.clone()); - manager = Some(conda_manager); - } - } + let manager = self.managers.get_or_insert_with(conda_dir.clone(), || { + CondaManager::from(conda_dir) + }); // 5. Report this env. if let Some(manager) = manager { let env = env.to_python_environment( Some(manager.to_manager()), ); - let mut environments = self.environments.write().unwrap(); - environments.insert(prefix.clone(), env.clone()); + self.environments.insert(prefix.clone(), env.clone()); reporter.report_manager(&manager.to_manager()); reporter.report_environment(&env); } else { @@ -365,8 +323,7 @@ impl Locator for Conda { // The client can activate this env either using another conda manager or using the activation scripts error!("Unable to find Conda Manager for Conda env (even though we have a conda_dir {:?}): Env Details = {:?}", conda_dir, env); let env = env.to_python_environment(None); - let mut environments = self.environments.write().unwrap(); - environments.insert(prefix.clone(), env.clone()); + self.environments.insert(prefix.clone(), env.clone()); reporter.report_environment(&env); } Option::<()>::Some(()) diff --git a/crates/pet-core/src/cache.rs b/crates/pet-core/src/cache.rs new file mode 100644 index 00000000..15759e4f --- /dev/null +++ b/crates/pet-core/src/cache.rs @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Generic caching abstraction for locators. +//! +//! Provides a thread-safe cache wrapper that consolidates common caching patterns +//! used across multiple locators in the codebase. + +use std::{collections::HashMap, hash::Hash, path::PathBuf, sync::RwLock}; + +use crate::{manager::EnvManager, python_environment::PythonEnvironment}; + +/// A thread-safe cache that stores key-value pairs using RwLock for concurrent access. +/// +/// This cache uses read-write locks to allow multiple concurrent readers while +/// ensuring exclusive access for writers. Values must implement Clone to be +/// returned from the cache. +pub struct LocatorCache { + cache: RwLock>, +} + +impl LocatorCache { + /// Creates a new empty cache. + pub fn new() -> Self { + Self { + cache: RwLock::new(HashMap::new()), + } + } + + /// Returns a cloned value for the given key if it exists in the cache. + pub fn get(&self, key: &K) -> Option { + self.cache.read().unwrap().get(key).cloned() + } + + /// Checks if the cache contains the given key. + pub fn contains_key(&self, key: &K) -> bool { + self.cache.read().unwrap().contains_key(key) + } + + /// Inserts a key-value pair into the cache. + /// + /// Returns the previous value if the key was already present. + pub fn insert(&self, key: K, value: V) -> Option { + self.cache.write().unwrap().insert(key, value) + } + + /// Returns a cloned value for the given key if it exists, otherwise computes + /// and inserts the value using the provided closure. + /// + /// This method first checks with a read lock, then upgrades to a write lock + /// if the value needs to be computed and inserted. + pub fn get_or_insert_with(&self, key: K, f: F) -> Option + where + F: FnOnce() -> Option, + K: Clone, + { + // First check with read lock + { + let cache = self.cache.read().unwrap(); + if let Some(value) = cache.get(&key) { + return Some(value.clone()); + } + } + + // Compute the value (outside of any lock) + if let Some(value) = f() { + // Acquire write lock and insert + let mut cache = self.cache.write().unwrap(); + // Double-check in case another thread inserted while we were computing + if let Some(existing) = cache.get(&key) { + return Some(existing.clone()); + } + cache.insert(key, value.clone()); + Some(value) + } else { + None + } + } + + /// Clears all entries from the cache. + pub fn clear(&self) { + self.cache.write().unwrap().clear(); + } + + /// Returns all values in the cache as a vector. + pub fn values(&self) -> Vec { + self.cache.read().unwrap().values().cloned().collect() + } + + /// Returns the number of entries in the cache. + pub fn len(&self) -> usize { + self.cache.read().unwrap().len() + } + + /// Returns true if the cache is empty. + pub fn is_empty(&self) -> bool { + self.cache.read().unwrap().is_empty() + } + + /// Returns all entries in the cache as a HashMap. + pub fn clone_map(&self) -> HashMap + where + K: Clone, + { + self.cache.read().unwrap().clone() + } +} + +impl Default for LocatorCache { + fn default() -> Self { + Self::new() + } +} + +/// Type alias for caching Python environments by their path. +pub type EnvironmentCache = LocatorCache; + +/// Type alias for caching environment managers by their path. +pub type ManagerCache = LocatorCache; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_get_and_insert() { + let cache: LocatorCache = LocatorCache::new(); + + assert!(cache.get(&"key1".to_string()).is_none()); + assert!(!cache.contains_key(&"key1".to_string())); + + cache.insert("key1".to_string(), 42); + + assert_eq!(cache.get(&"key1".to_string()), Some(42)); + assert!(cache.contains_key(&"key1".to_string())); + } + + #[test] + fn test_cache_get_or_insert_with() { + let cache: LocatorCache = LocatorCache::new(); + + // First call should compute and insert + let result = cache.get_or_insert_with("key1".to_string(), || Some(42)); + assert_eq!(result, Some(42)); + + // Second call should return cached value + let result = cache.get_or_insert_with("key1".to_string(), || Some(100)); + assert_eq!(result, Some(42)); + + // Test with None return + let result = cache.get_or_insert_with("key2".to_string(), || None); + assert!(result.is_none()); + assert!(!cache.contains_key(&"key2".to_string())); + } + + #[test] + fn test_cache_clear() { + let cache: LocatorCache = LocatorCache::new(); + + cache.insert("key1".to_string(), 42); + cache.insert("key2".to_string(), 100); + + assert_eq!(cache.len(), 2); + + cache.clear(); + + assert!(cache.is_empty()); + assert_eq!(cache.len(), 0); + } + + #[test] + fn test_cache_values() { + let cache: LocatorCache = LocatorCache::new(); + + cache.insert("key1".to_string(), 42); + cache.insert("key2".to_string(), 100); + + let mut values = cache.values(); + values.sort(); + assert_eq!(values, vec![42, 100]); + } +} diff --git a/crates/pet-core/src/lib.rs b/crates/pet-core/src/lib.rs index 1768cae6..0544f2bd 100644 --- a/crates/pet-core/src/lib.rs +++ b/crates/pet-core/src/lib.rs @@ -9,6 +9,7 @@ use python_environment::{PythonEnvironment, PythonEnvironmentKind}; use reporter::Reporter; pub mod arch; +pub mod cache; pub mod env; pub mod manager; pub mod os_environment; diff --git a/crates/pet-linux-global-python/src/lib.rs b/crates/pet-linux-global-python/src/lib.rs index c8c94ec9..56c1feb1 100644 --- a/crates/pet-linux-global-python/src/lib.rs +++ b/crates/pet-linux-global-python/src/lib.rs @@ -2,15 +2,16 @@ // Licensed under the MIT License. use std::{ - collections::{HashMap, HashSet}, + collections::HashSet, fs, path::{Path, PathBuf}, - sync::{Arc, RwLock}, + sync::Arc, thread, }; use pet_core::{ arch::Architecture, + cache::LocatorCache, env::PythonEnv, python_environment::{PythonEnvironment, PythonEnvironmentBuilder, PythonEnvironmentKind}, reporter::Reporter, @@ -21,15 +22,13 @@ use pet_python_utils::{env::ResolvedPythonEnv, executable::find_executables}; use pet_virtualenv::is_virtualenv; pub struct LinuxGlobalPython { - reported_executables: Arc>>, + reported_executables: Arc>, } impl LinuxGlobalPython { pub fn new() -> LinuxGlobalPython { LinuxGlobalPython { - reported_executables: Arc::new(RwLock::new( - HashMap::::new(), - )), + reported_executables: Arc::new(LocatorCache::new()), } } @@ -93,18 +92,14 @@ impl Locator for LinuxGlobalPython { return None; } - self.reported_executables - .read() - .unwrap() - .get(&executable) - .cloned() + self.reported_executables.get(&executable) } fn find(&self, reporter: &dyn Reporter) { if std::env::consts::OS == "macos" || std::env::consts::OS == "windows" { return; } - self.reported_executables.write().unwrap().clear(); + self.reported_executables.clear(); self.find_cached(Some(reporter)) } } @@ -112,18 +107,17 @@ impl Locator for LinuxGlobalPython { fn find_and_report_global_pythons_in( bin: &Path, reporter: Option<&dyn Reporter>, - reported_executables: &Arc>>, + reported_executables: &Arc>, ) { let python_executables = find_executables(bin); for exe in python_executables.clone().iter() { - if reported_executables.read().unwrap().contains_key(exe) { + if reported_executables.contains_key(exe) { continue; } if let Some(resolved) = ResolvedPythonEnv::from(exe) { if let Some(env) = get_python_in_bin(&resolved.to_python_env(), resolved.is64_bit) { resolved.add_to_cache(env.clone()); - let mut reported_executables = reported_executables.write().unwrap(); // env.symlinks = Some([symlinks, env.symlinks.clone().unwrap_or_default()].concat()); if let Some(symlinks) = &env.symlinks { for symlink in symlinks { From c8f43b04812ee96262132a0e3d55fe7667673fed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 02:21:40 +0000 Subject: [PATCH 3/3] Address PR review: add insert_many for atomic batch inserts, add #[must_use] to get_or_insert_with Co-authored-by: karthiknadig <3840081+karthiknadig@users.noreply.github.com> --- crates/pet-core/src/cache.rs | 30 +++++++++++++++++++++++ crates/pet-linux-global-python/src/lib.rs | 10 +++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/crates/pet-core/src/cache.rs b/crates/pet-core/src/cache.rs index 15759e4f..361a3bbd 100644 --- a/crates/pet-core/src/cache.rs +++ b/crates/pet-core/src/cache.rs @@ -44,11 +44,23 @@ impl LocatorCache { self.cache.write().unwrap().insert(key, value) } + /// Inserts multiple key-value pairs into the cache atomically. + /// + /// This method acquires a single write lock for all insertions, which is more + /// efficient than calling `insert` multiple times when inserting many entries. + pub fn insert_many(&self, entries: impl IntoIterator) { + let mut cache = self.cache.write().unwrap(); + for (key, value) in entries { + cache.insert(key, value); + } + } + /// Returns a cloned value for the given key if it exists, otherwise computes /// and inserts the value using the provided closure. /// /// This method first checks with a read lock, then upgrades to a write lock /// if the value needs to be computed and inserted. + #[must_use] pub fn get_or_insert_with(&self, key: K, f: F) -> Option where F: FnOnce() -> Option, @@ -179,4 +191,22 @@ mod tests { values.sort(); assert_eq!(values, vec![42, 100]); } + + #[test] + fn test_cache_insert_many() { + let cache: LocatorCache = LocatorCache::new(); + + let entries = vec![ + ("key1".to_string(), 42), + ("key2".to_string(), 100), + ("key3".to_string(), 200), + ]; + + cache.insert_many(entries); + + assert_eq!(cache.len(), 3); + assert_eq!(cache.get(&"key1".to_string()), Some(42)); + assert_eq!(cache.get(&"key2".to_string()), Some(100)); + assert_eq!(cache.get(&"key3".to_string()), Some(200)); + } } diff --git a/crates/pet-linux-global-python/src/lib.rs b/crates/pet-linux-global-python/src/lib.rs index 56c1feb1..1b0297e2 100644 --- a/crates/pet-linux-global-python/src/lib.rs +++ b/crates/pet-linux-global-python/src/lib.rs @@ -118,15 +118,19 @@ fn find_and_report_global_pythons_in( if let Some(resolved) = ResolvedPythonEnv::from(exe) { if let Some(env) = get_python_in_bin(&resolved.to_python_env(), resolved.is64_bit) { resolved.add_to_cache(env.clone()); - // env.symlinks = Some([symlinks, env.symlinks.clone().unwrap_or_default()].concat()); + + // Collect all entries to insert atomically + let mut entries = Vec::new(); if let Some(symlinks) = &env.symlinks { for symlink in symlinks { - reported_executables.insert(symlink.clone(), env.clone()); + entries.push((symlink.clone(), env.clone())); } } if let Some(exe) = env.executable.clone() { - reported_executables.insert(exe, env.clone()); + entries.push((exe, env.clone())); } + reported_executables.insert_many(entries); + if let Some(reporter) = reporter { reporter.report_environment(&env); }