From cac716cea23e60d2e22c1c5e0449740315c27265 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Thu, 22 Jan 2026 13:22:28 -0700 Subject: [PATCH 1/8] Fix MSVC build for Azure --- cpp/src/arrow/filesystem/azurefs.cc | 7 +++++++ cpp/src/arrow/filesystem/azurefs.h | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 6580476d38c..da53f5f7d14 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1363,6 +1363,7 @@ Result CheckIfHierarchicalNamespaceIsEnabled( directory_client.GetAccessControlList(); return HNSSupport::kEnabled; } catch (std::out_of_range& exception) { + ARROW_UNUSED(exception); // Azurite issue detected. DCHECK(IsDfsEmulator(options)); return HNSSupport::kDisabled; @@ -2500,6 +2501,7 @@ class AzureFileSystem::Impl { auto delete_result = deferred_response.GetResponse(); success = delete_result.Value.Deleted; } catch (const Core::RequestFailedException& exception) { + ARROW_UNUSED(exception); success = false; } if (!success) { @@ -3218,6 +3220,11 @@ class AzureFileSystem::Impl { std::atomic LeaseGuard::latest_known_expiry_time_ = SteadyClock::time_point{SteadyClock::duration::zero()}; +// Destructor must be defined here where Impl is a complete type. +// Defining it in the header (even as = default) causes MSVC to fail +// because it tries to instantiate std::default_delete before Impl is defined. +AzureFileSystem::~AzureFileSystem() {} + AzureFileSystem::AzureFileSystem(std::unique_ptr&& impl) : FileSystem(impl->io_context()), impl_(std::move(impl)) { default_async_is_sync_ = false; diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index ee0956afdd7..1a43153493f 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -251,7 +251,9 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { void ForceCachedHierarchicalNamespaceSupport(int hns_support); public: - ~AzureFileSystem() override = default; + // Destructor must be defined in the .cc file where Impl is complete, + // otherwise MSVC fails with "use of undefined type" for std::unique_ptr. + ~AzureFileSystem() override; static Result> Make( const AzureOptions& options, const io::IOContext& = io::default_io_context()); From 50d251114abeb7e429480b2c0c5688b52f56d5b0 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Thu, 22 Jan 2026 20:17:13 -0700 Subject: [PATCH 2/8] Add build tooling for Azure SDK --- .github/workflows/cpp_windows.yml | 2 ++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 28 +++++++++++++++++++++ cpp/thirdparty/versions.txt | 3 +++ 3 files changed, 33 insertions(+) diff --git a/.github/workflows/cpp_windows.yml b/.github/workflows/cpp_windows.yml index 69bbfee28b9..3e1f2b4181e 100644 --- a/.github/workflows/cpp_windows.yml +++ b/.github/workflows/cpp_windows.yml @@ -41,12 +41,14 @@ jobs: runs-on: ${{ inputs.os }} timeout-minutes: 60 env: + ARROW_AZURE: ON ARROW_BOOST_USE_SHARED: OFF ARROW_BUILD_BENCHMARKS: ON ARROW_BUILD_SHARED: ON ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: ON ARROW_DATASET: ON + ARROW_FILESYSTEM: ON ARROW_FLIGHT: OFF ARROW_HDFS: ON ARROW_HOME: /usr diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b95d6491457..37a1cffbecc 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -595,6 +595,13 @@ else() ) endif() +if(DEFINED ENV{ARROW_WIL_URL}) + set(ARROW_WIL_URL "$ENV{ARROW_WIL_URL}") +else() + set_urls(ARROW_WIL_URL + "https://github.com/microsoft/wil/archive/${ARROW_WIL_BUILD_VERSION}.tar.gz") +endif() + if(DEFINED ENV{ARROW_BOOST_URL}) set(BOOST_SOURCE_URL "$ENV{ARROW_BOOST_URL}") else() @@ -4054,6 +4061,27 @@ endif() function(build_azure_sdk) message(STATUS "Building Azure SDK for C++ from source") + + # On Windows, Azure SDK's WinHTTP transport requires WIL (Windows Implementation Libraries). + # Fetch WIL before Azure SDK so the WIL::WIL target is available. + if(WIN32) + message(STATUS "Fetching WIL (Windows Implementation Libraries) for Azure SDK") + fetchcontent_declare(wil + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ARROW_WIL_URL} + URL_HASH "SHA256=${ARROW_WIL_BUILD_SHA256_CHECKSUM}") + set(WIL_BUILD_PACKAGING OFF) + set(WIL_BUILD_TESTS OFF) + fetchcontent_makeavailable(wil) + # Create a minimal config file so Azure SDK's find_package(wil CONFIG) succeeds. + # The WIL::WIL target already exists from FetchContent above. + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/wil-config/wilConfig.cmake" + "# WIL loaded via FetchContent - target WIL::WIL already exists\n") + set(wil_DIR + "${CMAKE_CURRENT_BINARY_DIR}/wil-config" + CACHE PATH "" FORCE) + endif() + fetchcontent_declare(azure_sdk ${FC_DECLARE_COMMON_OPTIONS} URL ${ARROW_AZURE_SDK_URL} diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 442cde2c9c0..c39847ca6d4 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -56,6 +56,9 @@ ARROW_AWSSDK_BUILD_SHA256_CHECKSUM=b9944ba9905a68d6e53abb4f36ab2b3bd18ac88d85716 # Despite the confusing version name this is still the whole Azure SDK for C++ including core, keyvault, storage-common, etc. ARROW_AZURE_SDK_BUILD_VERSION=azure-identity_1.9.0 ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM=97065bfc971ac8df450853ce805f820f52b59457bd7556510186a1569502e4a1 +# WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport +ARROW_WIL_BUILD_VERSION=v1.0.250325.1 +ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786 ARROW_BOOST_BUILD_VERSION=1.88.0 ARROW_BOOST_BUILD_SHA256_CHECKSUM=dcea50f40ba1ecfc448fdf886c0165cf3e525fef2c9e3e080b9804e8117b9694 ARROW_BROTLI_BUILD_VERSION=v1.0.9 From 1f9b9886d69972a038cacec1ed9aa0769bddf703 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 11:02:23 -0700 Subject: [PATCH 3/8] Update cpp/src/arrow/filesystem/azurefs.cc Co-authored-by: Antoine Pitrou --- cpp/src/arrow/filesystem/azurefs.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index da53f5f7d14..93a3f661b27 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1362,8 +1362,7 @@ Result CheckIfHierarchicalNamespaceIsEnabled( // without hierarchical namespace enabled. directory_client.GetAccessControlList(); return HNSSupport::kEnabled; - } catch (std::out_of_range& exception) { - ARROW_UNUSED(exception); +} catch (const std::out_of_range&) { // Azurite issue detected. DCHECK(IsDfsEmulator(options)); return HNSSupport::kDisabled; From 2ba5b3d037ef686c19cc445232629fdfa2997b6c Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 11:04:32 -0700 Subject: [PATCH 4/8] Update cpp/cmake_modules/ThirdpartyToolchain.cmake Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 37a1cffbecc..850943befe6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4070,6 +4070,7 @@ function(build_azure_sdk) ${FC_DECLARE_COMMON_OPTIONS} URL ${ARROW_WIL_URL} URL_HASH "SHA256=${ARROW_WIL_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() set(WIL_BUILD_PACKAGING OFF) set(WIL_BUILD_TESTS OFF) fetchcontent_makeavailable(wil) From a81ac5533b524e422b2e3c47508f7e45c0cf53ee Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 13:39:44 -0700 Subject: [PATCH 5/8] Use OVERRIDE_FIND_PACKAGE to address Azure SDK WIL requirement --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 850943befe6..eb3c94e89d5 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4067,20 +4067,13 @@ function(build_azure_sdk) if(WIN32) message(STATUS "Fetching WIL (Windows Implementation Libraries) for Azure SDK") fetchcontent_declare(wil - ${FC_DECLARE_COMMON_OPTIONS} + ${FC_DECLARE_COMMON_OPTIONS} OVERRIDE_FIND_PACKAGE URL ${ARROW_WIL_URL} URL_HASH "SHA256=${ARROW_WIL_BUILD_SHA256_CHECKSUM}") prepare_fetchcontent() set(WIL_BUILD_PACKAGING OFF) set(WIL_BUILD_TESTS OFF) fetchcontent_makeavailable(wil) - # Create a minimal config file so Azure SDK's find_package(wil CONFIG) succeeds. - # The WIL::WIL target already exists from FetchContent above. - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/wil-config/wilConfig.cmake" - "# WIL loaded via FetchContent - target WIL::WIL already exists\n") - set(wil_DIR - "${CMAKE_CURRENT_BINARY_DIR}/wil-config" - CACHE PATH "" FORCE) endif() fetchcontent_declare(azure_sdk From 3f5cea2bc380edc50aaeb0eff65660c07593ae4e Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 13:46:12 -0700 Subject: [PATCH 6/8] Alphabetize variables and add them to DEPENDENCIES --- cpp/thirdparty/versions.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index c39847ca6d4..d94bf652ee8 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -56,9 +56,6 @@ ARROW_AWSSDK_BUILD_SHA256_CHECKSUM=b9944ba9905a68d6e53abb4f36ab2b3bd18ac88d85716 # Despite the confusing version name this is still the whole Azure SDK for C++ including core, keyvault, storage-common, etc. ARROW_AZURE_SDK_BUILD_VERSION=azure-identity_1.9.0 ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM=97065bfc971ac8df450853ce805f820f52b59457bd7556510186a1569502e4a1 -# WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport -ARROW_WIL_BUILD_VERSION=v1.0.250325.1 -ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786 ARROW_BOOST_BUILD_VERSION=1.88.0 ARROW_BOOST_BUILD_SHA256_CHECKSUM=dcea50f40ba1ecfc448fdf886c0165cf3e525fef2c9e3e080b9804e8117b9694 ARROW_BROTLI_BUILD_VERSION=v1.0.9 @@ -117,6 +114,9 @@ ARROW_THRIFT_BUILD_VERSION=0.22.0 ARROW_THRIFT_BUILD_SHA256_CHECKSUM=794a0e455787960d9f27ab92c38e34da27e8deeda7a5db0e59dc64a00df8a1e5 ARROW_UTF8PROC_BUILD_VERSION=v2.10.0 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=6f4f1b639daa6dca9f80bc5db1233e9cbaa31a67790887106160b33ef743f136 +# WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport +ARROW_WIL_BUILD_VERSION=v1.0.250325.1 +ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786 ARROW_XSIMD_BUILD_VERSION=14.0.0 ARROW_XSIMD_BUILD_SHA256_CHECKSUM=17de0236954955c10c09d6938d4c5f3a3b92d31be5dadd1d5d09fc1b15490dce ARROW_ZLIB_BUILD_VERSION=1.3.1 @@ -145,6 +145,7 @@ DEPENDENCIES=( "ARROW_AWS_CRT_CPP_URL aws-crt-cpp-${ARROW_AWS_CRT_CPP_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-crt-cpp/archive/${ARROW_AWS_CRT_CPP_BUILD_VERSION}.tar.gz" "ARROW_AWS_LC_URL aws-lc-${ARROW_AWS_LC_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-lc/archive/${ARROW_AWS_LC_BUILD_VERSION}.tar.gz" "ARROW_AWSSDK_URL aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz" + "ARROW_AZURE_SDK_URL azure-sdk-for-cpp-${ARROW_AZURE_SDK_BUILD_VERSION}.tar.gz https://github.com/Azure/azure-sdk-for-cpp/archive/${ARROW_AZURE_SDK_BUILD_VERSION}.tar.gz" "ARROW_BOOST_URL boost-${ARROW_BOOST_BUILD_VERSION}-cmake.tar.gz https://github.com/boostorg/boost/releases/download/boost-${ARROW_BOOST_BUILD_VERSION}/boost-${ARROW_BOOST_BUILD_VERSION}-cmake.tar.gz" "ARROW_BROTLI_URL brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz" "ARROW_BZIP2_URL bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz" @@ -171,6 +172,7 @@ DEPENDENCIES=( "ARROW_SUBSTRAIT_URL substrait-${ARROW_SUBSTRAIT_BUILD_VERSION}.tar.gz https://github.com/substrait-io/substrait/archive/${ARROW_SUBSTRAIT_BUILD_VERSION}.tar.gz" "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download" "ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" + "ARROW_WIL_URL wil-${ARROW_WIL_BUILD_VERSION}.tar.gz https://github.com/microsoft/wil/archive/refs/tags/${ARROW_WIL_BUILD_VERSION}.tar.gz" "ARROW_XSIMD_URL xsimd-${ARROW_XSIMD_BUILD_VERSION}.tar.gz https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz" "ARROW_ZLIB_URL zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz" "ARROW_ZSTD_URL zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz https://github.com/facebook/zstd/releases/download/v${ARROW_ZSTD_BUILD_VERSION}/zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz" From 7754f6df50a3497ea51fbf5a083ab531aa409cde Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 13:47:08 -0700 Subject: [PATCH 7/8] Remove unneeded comments and use = default for destructor --- cpp/src/arrow/filesystem/azurefs.cc | 10 +++------- cpp/src/arrow/filesystem/azurefs.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 93a3f661b27..4d2d353136d 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1362,7 +1362,7 @@ Result CheckIfHierarchicalNamespaceIsEnabled( // without hierarchical namespace enabled. directory_client.GetAccessControlList(); return HNSSupport::kEnabled; -} catch (const std::out_of_range&) { + } catch (const std::out_of_range&) { // Azurite issue detected. DCHECK(IsDfsEmulator(options)); return HNSSupport::kDisabled; @@ -2499,8 +2499,7 @@ class AzureFileSystem::Impl { try { auto delete_result = deferred_response.GetResponse(); success = delete_result.Value.Deleted; - } catch (const Core::RequestFailedException& exception) { - ARROW_UNUSED(exception); + } catch (const Core::RequestFailedException&) { success = false; } if (!success) { @@ -3219,10 +3218,7 @@ class AzureFileSystem::Impl { std::atomic LeaseGuard::latest_known_expiry_time_ = SteadyClock::time_point{SteadyClock::duration::zero()}; -// Destructor must be defined here where Impl is a complete type. -// Defining it in the header (even as = default) causes MSVC to fail -// because it tries to instantiate std::default_delete before Impl is defined. -AzureFileSystem::~AzureFileSystem() {} +AzureFileSystem::~AzureFileSystem() = default; AzureFileSystem::AzureFileSystem(std::unique_ptr&& impl) : FileSystem(impl->io_context()), impl_(std::move(impl)) { diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 1a43153493f..ae374d487b1 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -251,8 +251,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { void ForceCachedHierarchicalNamespaceSupport(int hns_support); public: - // Destructor must be defined in the .cc file where Impl is complete, - // otherwise MSVC fails with "use of undefined type" for std::unique_ptr. ~AzureFileSystem() override; static Result> Make( From c50396cbd73553331746e7bf475622dbaeabbb30 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 26 Jan 2026 23:47:17 -0700 Subject: [PATCH 8/8] Change kMaxBlockSizeBytes to Long Long for cross platform compatibility --- cpp/src/arrow/filesystem/azurefs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 4d2d353136d..5500d1845b0 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -970,7 +970,7 @@ Status StageBlock(Blobs::BlockBlobClient* block_blob_client, const std::string& /// Writes will be buffered up to this size (in bytes) before actually uploading them. static constexpr int64_t kBlockUploadSizeBytes = 10 * 1024 * 1024; /// The maximum size of a block in Azure Blob (as per docs). -static constexpr int64_t kMaxBlockSizeBytes = 4UL * 1024 * 1024 * 1024; +static constexpr int64_t kMaxBlockSizeBytes = 4LL * 1024 * 1024 * 1024; /// This output stream, similar to other arrow OutputStreams, is not thread-safe. class ObjectAppendStream final : public io::OutputStream {