From 8981f283d03367e61214e53ee9d6bb691dc9f54b Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Wed, 28 Jan 2026 16:51:31 -0800 Subject: [PATCH 1/2] Add glob support for directory and file pattern matching --- Cargo.lock | 7 + .../env_python_3/conda-meta/history | 4 +- crates/pet-fs/Cargo.toml | 1 + crates/pet-fs/src/glob.rs | 236 ++++++++++++++++++ crates/pet-fs/src/lib.rs | 1 + crates/pet/src/jsonrpc.rs | 32 ++- docs/JSONRPC.md | 15 ++ 7 files changed, 290 insertions(+), 6 deletions(-) create mode 100644 crates/pet-fs/src/glob.rs diff --git a/Cargo.lock b/Cargo.lock index c91c6a03..740830a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -289,6 +289,12 @@ dependencies = [ "wasip2", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "hashbrown" version = "0.14.5" @@ -501,6 +507,7 @@ dependencies = [ name = "pet-fs" version = "0.1.0" dependencies = [ + "glob", "log", "msvc_spectre_libs", "windows-sys 0.59.0", diff --git a/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history b/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history index fc09724d..0fd95448 100644 --- a/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history +++ b/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history @@ -1,8 +1,8 @@ ==> 2024-02-28 23:05:07 <== -# cmd: /home/runner/work/python-environment-tools/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda create -n conda1 +# cmd: /home/kanadig/GIT/projects/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda create -n conda1 # conda version: 23.11.0 ==> 2024-02-28 23:08:59 <== -# cmd: /home/runner/work/python-environment-tools/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda install -c conda-forge --name conda1 ipykernel -y +# cmd: /home/kanadig/GIT/projects/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda install -c conda-forge --name conda1 ipykernel -y # conda version: 23.11.0 +conda-forge/noarch::appnope-0.1.4-pyhd8ed1ab_0 +conda-forge/noarch::asttokens-2.4.1-pyhd8ed1ab_0 diff --git a/crates/pet-fs/Cargo.toml b/crates/pet-fs/Cargo.toml index 6fdba301..8663dbba 100644 --- a/crates/pet-fs/Cargo.toml +++ b/crates/pet-fs/Cargo.toml @@ -9,4 +9,5 @@ msvc_spectre_libs = { version = "0.1.1", features = ["error"] } windows-sys = { version = "0.59", features = ["Win32_Storage_FileSystem", "Win32_Foundation"] } [dependencies] +glob = "0.3.1" log = "0.4.21" diff --git a/crates/pet-fs/src/glob.rs b/crates/pet-fs/src/glob.rs new file mode 100644 index 00000000..a6772eb4 --- /dev/null +++ b/crates/pet-fs/src/glob.rs @@ -0,0 +1,236 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use glob::glob; +use std::path::PathBuf; + +/// Characters that indicate a path contains glob pattern metacharacters. +const GLOB_METACHARACTERS: &[char] = &['*', '?', '[', ']']; + +/// Checks whether a path string contains glob metacharacters. +/// +/// # Examples +/// - `"/home/user/*"` → `true` +/// - `"/home/user/envs"` → `false` +/// - `"**/*.py"` → `true` +/// - `"/home/user/[abc]"` → `true` +pub fn is_glob_pattern(path: &str) -> bool { + path.contains(GLOB_METACHARACTERS) +} + +/// Expands a single glob pattern to matching paths. +/// +/// If the path does not contain glob metacharacters, returns it unchanged (if it exists) +/// or as-is (to let downstream code handle non-existent paths). +/// +/// If the path is a glob pattern, expands it and returns all matching paths. +/// Pattern errors and unreadable paths are logged and skipped. +/// +/// # Examples +/// - `"/home/user/envs"` → `["/home/user/envs"]` +/// - `"/home/user/*/venv"` → `["/home/user/project1/venv", "/home/user/project2/venv"]` +/// - `"**/.venv"` → All `.venv` directories recursively +pub fn expand_glob_pattern(pattern: &str) -> Vec { + if !is_glob_pattern(pattern) { + // Not a glob pattern, return as-is + return vec![PathBuf::from(pattern)]; + } + + match glob(pattern) { + Ok(paths) => { + let mut result = Vec::new(); + for entry in paths { + match entry { + Ok(path) => result.push(path), + Err(e) => { + log::debug!("Failed to read glob entry: {}", e); + } + } + } + if result.is_empty() { + log::debug!("Glob pattern '{}' matched no paths", pattern); + } + result + } + Err(e) => { + log::warn!("Invalid glob pattern '{}': {}", pattern, e); + Vec::new() + } + } +} + +/// Expands a list of paths, where each path may be a glob pattern. +/// +/// Non-glob paths are passed through as-is. +/// Glob patterns are expanded to all matching paths. +/// Duplicate paths are preserved (caller should deduplicate if needed). +/// +/// # Examples +/// ```ignore +/// let paths = vec![ +/// PathBuf::from("/home/user/project"), +/// PathBuf::from("/home/user/*/venv"), +/// ]; +/// let expanded = expand_glob_patterns(&paths); +/// // expanded contains "/home/user/project" plus all matching venv dirs +/// ``` +pub fn expand_glob_patterns(paths: &[PathBuf]) -> Vec { + let mut result = Vec::new(); + for path in paths { + let path_str = path.to_string_lossy(); + let expanded = expand_glob_pattern(&path_str); + result.extend(expanded); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[test] + fn test_is_glob_pattern_with_asterisk() { + assert!(is_glob_pattern("/home/user/*")); + assert!(is_glob_pattern("**/*.py")); + assert!(is_glob_pattern("*.txt")); + } + + #[test] + fn test_is_glob_pattern_with_question_mark() { + assert!(is_glob_pattern("/home/user/file?.txt")); + assert!(is_glob_pattern("test?")); + } + + #[test] + fn test_is_glob_pattern_with_brackets() { + assert!(is_glob_pattern("/home/user/[abc]")); + assert!(is_glob_pattern("file[0-9].txt")); + } + + #[test] + fn test_is_glob_pattern_no_metacharacters() { + assert!(!is_glob_pattern("/home/user/envs")); + assert!(!is_glob_pattern("simple_path")); + assert!(!is_glob_pattern("/usr/local/bin/python3")); + } + + #[test] + fn test_expand_non_glob_path() { + let path = "/some/literal/path"; + let result = expand_glob_pattern(path); + assert_eq!(result.len(), 1); + assert_eq!(result[0], PathBuf::from(path)); + } + + #[test] + fn test_expand_glob_pattern_no_matches() { + let pattern = "/this/path/definitely/does/not/exist/*"; + let result = expand_glob_pattern(pattern); + assert!(result.is_empty()); + } + + #[test] + fn test_expand_glob_pattern_with_matches() { + // Create temp directories for testing + let temp_dir = std::env::temp_dir().join("pet_glob_test"); + let _ = fs::remove_dir_all(&temp_dir); + fs::create_dir_all(temp_dir.join("project1")).unwrap(); + fs::create_dir_all(temp_dir.join("project2")).unwrap(); + fs::create_dir_all(temp_dir.join("other")).unwrap(); + + let pattern = format!("{}/project*", temp_dir.to_string_lossy()); + let result = expand_glob_pattern(&pattern); + + assert_eq!(result.len(), 2); + assert!(result.iter().any(|p| p.ends_with("project1"))); + assert!(result.iter().any(|p| p.ends_with("project2"))); + assert!(!result.iter().any(|p| p.ends_with("other"))); + + // Cleanup + let _ = fs::remove_dir_all(&temp_dir); + } + + #[test] + fn test_expand_glob_patterns_mixed() { + let temp_dir = std::env::temp_dir().join("pet_glob_test_mixed"); + let _ = fs::remove_dir_all(&temp_dir); + fs::create_dir_all(temp_dir.join("dir1")).unwrap(); + fs::create_dir_all(temp_dir.join("dir2")).unwrap(); + + let paths = vec![ + PathBuf::from("/literal/path"), + PathBuf::from(format!("{}/dir*", temp_dir.to_string_lossy())), + ]; + + let result = expand_glob_patterns(&paths); + + // Should have literal path + 2 expanded directories + assert_eq!(result.len(), 3); + assert!(result.contains(&PathBuf::from("/literal/path"))); + + // Cleanup + let _ = fs::remove_dir_all(&temp_dir); + } + + #[test] + fn test_expand_glob_pattern_recursive() { + // Create nested temp directories for testing ** + let temp_dir = std::env::temp_dir().join("pet_glob_test_recursive"); + let _ = fs::remove_dir_all(&temp_dir); + fs::create_dir_all(temp_dir.join("a/b/.venv")).unwrap(); + fs::create_dir_all(temp_dir.join("c/.venv")).unwrap(); + fs::create_dir_all(temp_dir.join(".venv")).unwrap(); + + let pattern = format!("{}/**/.venv", temp_dir.to_string_lossy()); + let result = expand_glob_pattern(&pattern); + + // Should find .venv at multiple levels (behavior depends on glob crate version) + assert!(!result.is_empty()); + assert!(result.iter().all(|p| p.ends_with(".venv"))); + + // Cleanup + let _ = fs::remove_dir_all(&temp_dir); + } + + #[test] + fn test_expand_glob_pattern_filename_patterns() { + // Create temp files for testing filename patterns like python_* and python.* + let temp_dir = std::env::temp_dir().join("pet_glob_test_filenames"); + let _ = fs::remove_dir_all(&temp_dir); + fs::create_dir_all(&temp_dir).unwrap(); + + // Create files matching python_* pattern + fs::write(temp_dir.join("python_foo"), "").unwrap(); + fs::write(temp_dir.join("python_bar"), "").unwrap(); + fs::write(temp_dir.join("python_3.12"), "").unwrap(); + fs::write(temp_dir.join("other_file"), "").unwrap(); + + // Test python_* pattern + let pattern = format!("{}/python_*", temp_dir.to_string_lossy()); + let result = expand_glob_pattern(&pattern); + + assert_eq!(result.len(), 3); + assert!(result.iter().any(|p| p.ends_with("python_foo"))); + assert!(result.iter().any(|p| p.ends_with("python_bar"))); + assert!(result.iter().any(|p| p.ends_with("python_3.12"))); + assert!(!result.iter().any(|p| p.ends_with("other_file"))); + + // Create files matching python.* pattern + fs::write(temp_dir.join("python.exe"), "").unwrap(); + fs::write(temp_dir.join("python.sh"), "").unwrap(); + fs::write(temp_dir.join("pythonrc"), "").unwrap(); + + // Test python.* pattern + let pattern = format!("{}/python.*", temp_dir.to_string_lossy()); + let result = expand_glob_pattern(&pattern); + + assert_eq!(result.len(), 2); + assert!(result.iter().any(|p| p.ends_with("python.exe"))); + assert!(result.iter().any(|p| p.ends_with("python.sh"))); + assert!(!result.iter().any(|p| p.ends_with("pythonrc"))); + + // Cleanup + let _ = fs::remove_dir_all(&temp_dir); + } +} diff --git a/crates/pet-fs/src/lib.rs b/crates/pet-fs/src/lib.rs index 6e626fd0..5ad4978c 100644 --- a/crates/pet-fs/src/lib.rs +++ b/crates/pet-fs/src/lib.rs @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +pub mod glob; pub mod path; diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index eaf87446..0097a254 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -21,6 +21,7 @@ use pet_core::{ Configuration, Locator, }; use pet_env_var_path::get_search_paths_from_env_variables; +use pet_fs::glob::expand_glob_patterns; use pet_jsonrpc::{ send_error, send_reply, server::{start_server, HandlersKeyedByMethodName}, @@ -92,11 +93,13 @@ pub fn start_jsonrpc_server() { #[serde(rename_all = "camelCase")] pub struct ConfigureOptions { /// These are paths like workspace folders, where we can look for environments. + /// Glob patterns are supported (e.g., "/home/user/projects/*"). pub workspace_directories: Option>, pub conda_executable: Option, pub poetry_executable: Option, /// Custom locations where environments can be found. Generally global locations where virtualenvs & the like can be found. /// Workspace directories should not be included into this list. + /// Glob patterns are supported (e.g., "/home/user/envs/*"). pub environment_directories: Option>, /// Directory to cache the Python environment details. pub cache_directory: Option, @@ -108,9 +111,22 @@ pub fn handle_configure(context: Arc, id: u32, params: Value) { // Start in a new thread, we can have multiple requests. thread::spawn(move || { let mut cfg = context.configuration.write().unwrap(); - cfg.workspace_directories = configure_options.workspace_directories; + // Expand glob patterns in workspace_directories + cfg.workspace_directories = configure_options.workspace_directories.map(|dirs| { + expand_glob_patterns(&dirs) + .into_iter() + .filter(|p| p.is_dir()) + .collect() + }); cfg.conda_executable = configure_options.conda_executable; - cfg.environment_directories = configure_options.environment_directories; + // Expand glob patterns in environment_directories + cfg.environment_directories = + configure_options.environment_directories.map(|dirs| { + expand_glob_patterns(&dirs) + .into_iter() + .filter(|p| p.is_dir()) + .collect() + }); cfg.poetry_executable = configure_options.poetry_executable; // We will not support changing the cache directories once set. // No point, supporting such a use case. @@ -142,6 +158,7 @@ pub struct RefreshOptions { /// If provided, then limit the search paths to these. /// Note: Search paths can also include Python exes or Python env folders. /// Traditionally, search paths are workspace folders. + /// Glob patterns are supported (e.g., "/home/user/*/venv", "**/.venv"). pub search_paths: Option>, } @@ -187,16 +204,23 @@ pub fn handle_refresh(context: Arc, id: u32, params: Value) { // Always clear this, as we will either serach in specified folder or a specific kind in global locations. config.workspace_directories = None; if let Some(search_paths) = refresh_options.search_paths { + // Expand any glob patterns in the search paths + let expanded_paths = expand_glob_patterns(&search_paths); + trace!( + "Expanded {} search paths to {} paths", + search_paths.len(), + expanded_paths.len() + ); // These workspace folders are only for this refresh. config.workspace_directories = Some( - search_paths + expanded_paths .iter() .filter(|p| p.is_dir()) .cloned() .collect(), ); config.executables = Some( - search_paths + expanded_paths .iter() .filter(|p| p.is_file()) .cloned() diff --git a/docs/JSONRPC.md b/docs/JSONRPC.md index e18daef1..dec64c64 100644 --- a/docs/JSONRPC.md +++ b/docs/JSONRPC.md @@ -37,6 +37,8 @@ interface ConfigureParams { * * If not provided, then environments such as poetry, pipenv, and the like will not be reported. * This is because poetry, pipenv, and the like are project specific enviornents. + * + * Glob patterns are supported (e.g., "/home/user/projects/*", "**/.venv"). */ workspaceDirectories?: string[]; /** @@ -44,6 +46,8 @@ interface ConfigureParams { * This is useful when the virtual environments are stored in some custom locations. * * Useful for VS Code so users can configure where they store virtual environments. + * + * Glob patterns are supported (e.g., "/home/user/envs/*", "/home/user/*/venv"). */ environmentDirectories?: string[]; /** @@ -95,6 +99,17 @@ interface RefreshParams { * Limits the search to a specific set of paths. * searchPaths can either by directories or Python prefixes/executables or combination of both. * Ignores workspace folders passed in configuration request. + * + * Glob patterns are supported: + * - `*` matches any sequence of characters in a path component + * - `?` matches any single character + * - `**` matches any sequence of path components (recursive) + * - `[...]` matches any character inside the brackets + * + * Examples: + * - "/home/user/projects/*" - all directories under projects + * - "/home/user/**/venv" - all venv directories recursively + * - "/home/user/project[0-9]" - project0, project1, etc. */ searchPaths?: string[]; } From f95de4de3e4600352dad2f623b6473a216195fcf Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Thu, 29 Jan 2026 11:53:52 -0800 Subject: [PATCH 2/2] revert conda meta history --- .../env_python_3/conda-meta/history | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history b/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history index 0fd95448..fc09724d 100644 --- a/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history +++ b/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/env_python_3/conda-meta/history @@ -1,8 +1,8 @@ ==> 2024-02-28 23:05:07 <== -# cmd: /home/kanadig/GIT/projects/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda create -n conda1 +# cmd: /home/runner/work/python-environment-tools/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda create -n conda1 # conda version: 23.11.0 ==> 2024-02-28 23:08:59 <== -# cmd: /home/kanadig/GIT/projects/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda install -c conda-forge --name conda1 ipykernel -y +# cmd: /home/runner/work/python-environment-tools/python-environment-tools/crates/pet-conda/tests/unix/conda_env_without_manager_but_found_in_history/some_other_location/conda_install/bin/conda install -c conda-forge --name conda1 ipykernel -y # conda version: 23.11.0 +conda-forge/noarch::appnope-0.1.4-pyhd8ed1ab_0 +conda-forge/noarch::asttokens-2.4.1-pyhd8ed1ab_0