From 476e96ac08af0801f230ad10c88ca4ebad2abc7f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:17:18 +0100 Subject: [PATCH 01/12] Refactor x86 CPU features --- include/xsimd/config/xsimd_config.hpp | 11 + include/xsimd/config/xsimd_cpuid.hpp | 162 +++------- include/xsimd/types/xsimd_all_registers.hpp | 2 + include/xsimd/xsimd_cpu_features_x86.hpp | 321 ++++++++++++++++++++ 4 files changed, 371 insertions(+), 125 deletions(-) create mode 100644 include/xsimd/xsimd_cpu_features_x86.hpp diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 58ce53462..aead21c45 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -33,6 +33,17 @@ * @defgroup xsimd_config_macro Instruction Set Detection */ +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is the x86 architecture family. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) +#define XSIMD_TARGET_X86 1 +#else +#define XSIMD_TARGET_X86 0 +#endif + /** * @ingroup xsimd_config_macro * diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 1be4f018a..8a969db16 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -12,9 +12,12 @@ #ifndef XSIMD_CPUID_HPP #define XSIMD_CPUID_HPP -#include #include +#include "../types/xsimd_all_registers.hpp" +#include "../xsimd_cpu_features_x86.hpp" +#include "xsimd_inline.hpp" + #if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector)) #include #include @@ -25,13 +28,6 @@ #endif -#if defined(_MSC_VER) -// Contains the definition of __cpuidex -#include -#endif - -#include "../types/xsimd_all_registers.hpp" - namespace xsimd { namespace detail @@ -122,138 +118,54 @@ namespace xsimd #endif rvv = bool(getauxval(AT_HWCAP) & HWCAP_V); #endif - -#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) - - auto get_xcr0_low = []() noexcept - { - uint32_t xcr0; - -#if defined(_MSC_VER) && _MSC_VER >= 1400 - - xcr0 = (uint32_t)_xgetbv(0); - -#elif defined(__GNUC__) - - __asm__( - "xorl %%ecx, %%ecx\n" - "xgetbv\n" - : "=a"(xcr0) - : -#if defined(__i386__) - : "ecx", "edx" -#else - : "rcx", "rdx" -#endif - ); - -#else /* _MSC_VER < 1400 */ -#error "_MSC_VER < 1400 is not supported" -#endif /* _MSC_VER && _MSC_VER >= 1400 */ - return xcr0; - }; - - auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept - { - -#if defined(_MSC_VER) - __cpuidex(reg, level, count); - -#elif defined(__INTEL_COMPILER) - __cpuid(reg, level); - -#elif defined(__GNUC__) || defined(__clang__) - -#if defined(__i386__) && defined(__PIC__) - // %ebx may be the PIC register - __asm__("xchg{l}\t{%%}ebx, %1\n\t" - "cpuid\n\t" - "xchg{l}\t{%%}ebx, %1\n\t" - : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) - : "0"(level), "2"(count)); - -#else - __asm__("cpuid\n\t" - : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) - : "0"(level), "2"(count)); -#endif - -#else -#error "Unsupported configuration" #endif - }; - - int regs1[4]; - - get_cpuid(regs1, 0x1); - - // OS can explicitly disable the usage of SSE/AVX extensions - // by setting an appropriate flag in CR0 register - // - // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + const auto cpuid = xsimd::x86_cpu_id::read(); + auto xcr0 = xsimd::x86_xcr0::make_false(); - unsigned sse_state_os_enabled = 1; + bool sse_state_os_enabled = true; // AVX and AVX512 strictly require OSXSAVE to be enabled by the OS. // If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1), // AVX state won't be preserved across context switches, so AVX cannot be used. - unsigned avx_state_os_enabled = 0; - unsigned avx512_state_os_enabled = 0; + bool avx_state_os_enabled = false; + bool avx512_state_os_enabled = false; - // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit - // 18] to enable XSETBV/XGETBV instructions to access XCR0 and - // to support processor extended state management using - // XSAVE/XRSTOR. - bool osxsave = regs1[2] >> 27 & 1; - if (osxsave) + if (cpuid.osxsave()) { + xcr0 = xsimd::x86_xcr0::read(); - uint32_t xcr0 = get_xcr0_low(); - - sse_state_os_enabled = xcr0 >> 1 & 1; - avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; - avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; + sse_state_os_enabled = xcr0.sse_state_os_enabled(); + avx_state_os_enabled = xcr0.avx_state_os_enabled(); + avx512_state_os_enabled = xcr0.avx512_state_os_enabled(); } - sse2 = regs1[3] >> 26 & sse_state_os_enabled; - sse3 = regs1[2] >> 0 & sse_state_os_enabled; - ssse3 = regs1[2] >> 9 & sse_state_os_enabled; - sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; - sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; - fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - - avx = regs1[2] >> 28 & avx_state_os_enabled; - fma3_avx = avx && fma3_sse42; - - int regs8[4]; - get_cpuid(regs8, 0x80000001); - fma4 = regs8[2] >> 16 & avx_state_os_enabled; - - // sse4a = regs[2] >> 6 & 1; + sse2 = cpuid.sse2() && sse_state_os_enabled; + sse3 = cpuid.sse3() && sse_state_os_enabled; + ssse3 = cpuid.ssse3() && sse_state_os_enabled; + sse4_1 = cpuid.sse4_1() && sse_state_os_enabled; + sse4_2 = cpuid.sse4_2() && sse_state_os_enabled; + fma3_sse42 = cpuid.fma3() && sse_state_os_enabled; - // xop = regs[2] >> 11 & 1; - - int regs7[4]; - get_cpuid(regs7, 0x7); - avx2 = regs7[1] >> 5 & avx_state_os_enabled; - - int regs7a[4]; - get_cpuid(regs7a, 0x7, 0x1); - avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; + // sse4a not implemented in cpu_id yet + // xop not implemented in cpu_id yet + avx = cpuid.avx() && avx_state_os_enabled; + fma3_avx = avx && fma3_sse42; + fma4 = cpuid.fma4() && avx_state_os_enabled; + avx2 = cpuid.avx2() && avx_state_os_enabled; + avxvnni = cpuid.avxvnni() && avx_state_os_enabled; fma3_avx2 = avx2 && fma3_sse42; - avx512f = regs7[1] >> 16 & avx512_state_os_enabled; - avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; - avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; - avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; - avx512er = regs7[1] >> 27 & avx512_state_os_enabled; - avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; - avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; - avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; - avx512vbmi2 = regs7[2] >> 6 & avx512_state_os_enabled; - avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; + avx512f = cpuid.avx512f() && avx512_state_os_enabled; + avx512cd = cpuid.avx512cd() && avx512_state_os_enabled; + avx512dq = cpuid.avx512dq() && avx512_state_os_enabled; + avx512bw = cpuid.avx512bw() && avx512_state_os_enabled; + avx512er = cpuid.avx512er() && avx512_state_os_enabled; + avx512pf = cpuid.avx512pf() && avx512_state_os_enabled; + avx512ifma = cpuid.avx512ifma() && avx512_state_os_enabled; + avx512vbmi = cpuid.avx512vbmi() && avx512_state_os_enabled; + avx512vbmi2 = cpuid.avx512vbmi2() && avx512_state_os_enabled; + avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_state_os_enabled; avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; -#endif } }; } // namespace detail diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index 33f9b465d..da87df2e4 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -9,6 +9,8 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ +#include "../config/xsimd_inline.hpp" + #include "xsimd_fma3_sse_register.hpp" #include "xsimd_fma4_register.hpp" #include "xsimd_sse2_register.hpp" diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp new file mode 100644 index 000000000..185c08c30 --- /dev/null +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -0,0 +1,321 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_X86_HPP +#define XSIMD_CPU_FEATURES_X86_HPP + +#include + +#include "./config/xsimd_config.hpp" + +#if XSIMD_TARGET_X86 && defined(_MSC_VER) +// Contains the definition of __cpuidex +#include +#endif + +namespace xsimd +{ + namespace detail + { + template + constexpr I make_bit_mask(I bit) + { + return static_cast(I { 1 } << bit); + } + + template + constexpr I make_bit_mask(I bit, Args... bits) + { + return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); + } + + template + constexpr bool bit_is_set(I value) + { + constexpr I mask = make_bit_mask(static_cast(Bits)...); + return (value & mask) == mask; + } + + inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; + + inline std::uint32_t get_xcr0_low() noexcept; + } + + /* + * Extended Control Register 0 (XCR0). + * + * Operating systems can explicitly disable the usage of instruction set (such + * as SSE or AVX extensions) by setting an appropriate flag in XCR0 register. + * This utility parses such bit values. + * + * @see https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + */ + class x86_xcr0 + { + public: + using reg_t = std::uint32_t; + + /** Parse a XCR0 value into individual components. */ + constexpr explicit x86_xcr0(reg_t low) noexcept + : m_low(low) + { + } + + /** Create an object that has all features set to false. */ + static constexpr x86_xcr0 make_false() + { + return x86_xcr0(0); + } + + /** Read the XCR0 register from the CPU if on the correct architecture. */ + static x86_xcr0 read() + { + return x86_xcr0(detail::get_xcr0_low()); + } + + constexpr bool sse_state_os_enabled() const noexcept + { + return detail::bit_is_set<1>(m_low); + } + + constexpr bool avx_state_os_enabled() const noexcept + { + // Check both SSE and AVX bits even though AVX must imply SSE + return detail::bit_is_set<1, 2>(m_low); + } + + constexpr bool avx512_state_os_enabled() const noexcept + { + // Check all SSE, AVX, and AVX52 bits even though AVX512 must + // imply AVX and SSE + return detail::bit_is_set<1, 2, 6>(m_low); + } + + private: + std::uint32_t m_low = {}; + }; + + class x86_cpu_id + { + public: + struct cpu_id_regs + { + using reg_t = int[4]; + + reg_t reg1 = {}; + reg_t reg7 = {}; + reg_t reg7a = {}; + reg_t reg8 = {}; + }; + + /** Parse CpuInfo register values into individual components. */ + constexpr explicit x86_cpu_id(const cpu_id_regs& regs) noexcept + : m_regs(regs) + { + } + + /** + * Read the CpuId registers from the CPU if on the correct architecture. + * + * This is only safe to call if bit 18 of CR4.OSXSAVE has been set. + * + * @see cpu_id::osxsave + */ + static x86_cpu_id read() + { + cpu_id_regs regs = {}; + detail::get_cpuid(regs.reg1, 0x1); + detail::get_cpuid(regs.reg7, 0x7); + detail::get_cpuid(regs.reg7a, 0x7, 0x1); + detail::get_cpuid(regs.reg8, 0x80000001); + return x86_cpu_id(regs); + } + + constexpr bool sse2() const noexcept + { + return detail::bit_is_set<26>(m_regs.reg1[3]); + } + + constexpr bool sse3() const noexcept + { + return detail::bit_is_set<0>(m_regs.reg1[2]); + } + + constexpr bool ssse3() const noexcept + { + return detail::bit_is_set<9>(m_regs.reg1[2]); + } + + constexpr bool sse4_1() const noexcept + { + return detail::bit_is_set<19>(m_regs.reg1[2]); + } + + constexpr bool sse4_2() const noexcept + { + return detail::bit_is_set<20>(m_regs.reg1[2]); + } + + constexpr bool fma3() const noexcept + { + return detail::bit_is_set<12>(m_regs.reg1[2]); + } + + /** + * Indicates whether the OS has enabled extended state management. + * + * When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register, + * enabling the XGETBV/XSETBV instructions to access XCR0 and support + * processor extended state management using XSAVE/XRSTOR. + * + * This value is read from CPUID leaf 0x1, ECX bit 27, which reflects + * the state of CR4.OSXSAVE. + */ + constexpr bool osxsave() const noexcept + { + return detail::bit_is_set<27>(m_regs.reg1[2]); + } + + constexpr bool avx() const noexcept + { + return detail::bit_is_set<28>(m_regs.reg1[2]); + } + + constexpr bool avx2() const noexcept + { + return detail::bit_is_set<5>(m_regs.reg7[1]); + } + + constexpr bool avx512f() const noexcept + { + return detail::bit_is_set<16>(m_regs.reg7[1]); + } + + constexpr bool avx512dq() const noexcept + { + return detail::bit_is_set<17>(m_regs.reg7[1]); + } + + constexpr bool avx512ifma() const noexcept + { + return detail::bit_is_set<21>(m_regs.reg7[1]); + } + + constexpr bool avx512pf() const noexcept + { + return detail::bit_is_set<26>(m_regs.reg7[1]); + } + + constexpr bool avx512er() const noexcept + { + return detail::bit_is_set<27>(m_regs.reg7[1]); + } + + constexpr bool avx512cd() const noexcept + { + return detail::bit_is_set<28>(m_regs.reg7[1]); + } + + constexpr bool avx512bw() const noexcept + { + return detail::bit_is_set<30>(m_regs.reg7[1]); + } + + constexpr bool avx512vbmi() const noexcept + { + return detail::bit_is_set<1>(m_regs.reg7[2]); + } + + constexpr bool avx512vbmi2() const noexcept + { + return detail::bit_is_set<6>(m_regs.reg7[2]); + } + + constexpr bool avx512vnni_bw() const noexcept + { + return detail::bit_is_set<11>(m_regs.reg7[2]); + } + + constexpr bool avxvnni() const noexcept + { + return detail::bit_is_set<4>(m_regs.reg7a[0]); + } + + constexpr bool fma4() const noexcept + { + return detail::bit_is_set<16>(m_regs.reg8[2]); + } + + private: + cpu_id_regs m_regs = {}; + }; + + namespace detail + { + inline void get_cpuid(int reg[4], int level, int count) noexcept + { +#if !XSIMD_TARGET_X86 + reg = {}; // Fill zeros + +#elif defined(_MSC_VER) + __cpuidex(reg, level, count); + +#elif defined(__INTEL_COMPILER) + __cpuid(reg, level); + +#elif defined(__GNUC__) || defined(__clang__) + +#if defined(__i386__) && defined(__PIC__) + // %ebx may be the PIC register + __asm__("xchg{l}\t{%%}ebx, %1\n\t" + "cpuid\n\t" + "xchg{l}\t{%%}ebx, %1\n\t" + : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) + : "0"(level), "2"(count)); + +#else + __asm__("cpuid\n\t" + : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) + : "0"(level), "2"(count)); +#endif +#endif + } + + inline std::uint32_t get_xcr0_low() noexcept + { +#if !XSIMD_TARGET_X86 + return {}; // return 0; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + return static_cast(_xgetbv(0)); + +#elif defined(__GNUC__) + std::uint32_t xcr0 = {}; + __asm__( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a"(xcr0) + : +#if defined(__i386__) + : "ecx", "edx" +#else + : "rcx", "rdx" +#endif + ); + return xcr0; + +#else /* _MSC_VER < 1400 */ +#error "_MSC_VER < 1400 is not supported" +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + }; + } +} +#endif From 99a5a29acc8530ef0a5994dc2d64e719008953a1 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:23:06 +0100 Subject: [PATCH 02/12] Remove unsafe memset --- include/xsimd/config/xsimd_cpuid.hpp | 6 +----- include/xsimd/types/xsimd_common_arch.hpp | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 8a969db16..c64bfa6f8 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -12,8 +12,6 @@ #ifndef XSIMD_CPUID_HPP #define XSIMD_CPUID_HPP -#include - #include "../types/xsimd_all_registers.hpp" #include "../xsimd_cpu_features_x86.hpp" #include "xsimd_inline.hpp" @@ -36,7 +34,7 @@ namespace xsimd { #define ARCH_FIELD_EX(arch, field_name) \ - unsigned field_name; \ + unsigned field_name = 0; \ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } #define ARCH_FIELD_EX_REUSE(arch, field_name) \ @@ -86,8 +84,6 @@ namespace xsimd XSIMD_INLINE supported_arch() noexcept { - memset(this, 0, sizeof(supported_arch)); - #if XSIMD_WITH_WASM wasm = 1; #endif diff --git a/include/xsimd/types/xsimd_common_arch.hpp b/include/xsimd/types/xsimd_common_arch.hpp index 28491aeda..a33c868ea 100644 --- a/include/xsimd/types/xsimd_common_arch.hpp +++ b/include/xsimd/types/xsimd_common_arch.hpp @@ -12,6 +12,8 @@ #ifndef XSIMD_COMMON_ARCH_HPP #define XSIMD_COMMON_ARCH_HPP +#include + #include "../config/xsimd_config.hpp" /** From 67162fa0ceba07ca2dd193e6fa852000118bae10 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:35:20 +0100 Subject: [PATCH 03/12] Fix warning --- include/xsimd/xsimd_cpu_features_x86.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 185c08c30..348c8b2c8 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -263,7 +263,12 @@ namespace xsimd inline void get_cpuid(int reg[4], int level, int count) noexcept { #if !XSIMD_TARGET_X86 - reg = {}; // Fill zeros + reg[0] = 0; + reg[1] = 0; + reg[2] = 0; + reg[3] = 0; + (void)level; + (void)count; #elif defined(_MSC_VER) __cpuidex(reg, level, count); From 3ca912bae92f881df07bf204f6fac1c4fb12351a Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:40:15 +0100 Subject: [PATCH 04/12] Fix inline --- include/xsimd/xsimd_cpu_features_x86.hpp | 4 ++-- test/check_inline_specifier.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 348c8b2c8..99e6b6eb3 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -76,7 +76,7 @@ namespace xsimd } /** Read the XCR0 register from the CPU if on the correct architecture. */ - static x86_xcr0 read() + inline static x86_xcr0 read() { return x86_xcr0(detail::get_xcr0_low()); } @@ -129,7 +129,7 @@ namespace xsimd * * @see cpu_id::osxsave */ - static x86_cpu_id read() + inline static x86_cpu_id read() { cpu_id_regs regs = {}; detail::get_cpuid(regs.reg1, 0x1); diff --git a/test/check_inline_specifier.sh b/test/check_inline_specifier.sh index 1ccdda130..2337b3707 100755 --- a/test/check_inline_specifier.sh +++ b/test/check_inline_specifier.sh @@ -3,7 +3,7 @@ # Usage: $0 top_srcdir # # This script walks all headers in $top_srcdir/include and makes sure that all -# functions declared tehre are marked as inline or constexpr (which implies +# functions declared there are marked as inline or constexpr (which implies # inline). This makes sure the xsimd headers does not define symbol with global # linkage, and somehow convey our itnent to have all functions in xsimd being # inlined by the compiler. From 182aecf7612dd72e05456ba6f4b01cf4d4d5ef78 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 14:54:33 +0100 Subject: [PATCH 05/12] Refactor bit utils --- include/xsimd/utils/bits.hpp | 40 +++++++++++++ include/xsimd/xsimd_cpu_features_x86.hpp | 71 +++++++++--------------- 2 files changed, 66 insertions(+), 45 deletions(-) create mode 100644 include/xsimd/utils/bits.hpp diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp new file mode 100644 index 000000000..e3ad83e30 --- /dev/null +++ b/include/xsimd/utils/bits.hpp @@ -0,0 +1,40 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_CPUID_UTILS_HPP +#define XSIMD_CPUID_UTILS_HPP + +namespace xsimd +{ + namespace utils + { + template + constexpr I make_bit_mask(I bit) + { + return static_cast(I { 1 } << bit); + } + + template + constexpr I make_bit_mask(I bit, Args... bits) + { + return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); + } + + template + constexpr bool bit_is_set(I value) + { + constexpr I mask = make_bit_mask(static_cast(Bits)...); + return (value & mask) == mask; + } + } +} + +#endif diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 99e6b6eb3..ba816d2cd 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -15,35 +15,16 @@ #include #include "./config/xsimd_config.hpp" +#include "./utils/bits.hpp" #if XSIMD_TARGET_X86 && defined(_MSC_VER) -// Contains the definition of __cpuidex -#include +#include // Contains the definition of __cpuidex #endif namespace xsimd { namespace detail { - template - constexpr I make_bit_mask(I bit) - { - return static_cast(I { 1 } << bit); - } - - template - constexpr I make_bit_mask(I bit, Args... bits) - { - return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); - } - - template - constexpr bool bit_is_set(I value) - { - constexpr I mask = make_bit_mask(static_cast(Bits)...); - return (value & mask) == mask; - } - inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; inline std::uint32_t get_xcr0_low() noexcept; @@ -83,20 +64,20 @@ namespace xsimd constexpr bool sse_state_os_enabled() const noexcept { - return detail::bit_is_set<1>(m_low); + return utils::bit_is_set<1>(m_low); } constexpr bool avx_state_os_enabled() const noexcept { // Check both SSE and AVX bits even though AVX must imply SSE - return detail::bit_is_set<1, 2>(m_low); + return utils::bit_is_set<1, 2>(m_low); } constexpr bool avx512_state_os_enabled() const noexcept { // Check all SSE, AVX, and AVX52 bits even though AVX512 must // imply AVX and SSE - return detail::bit_is_set<1, 2, 6>(m_low); + return utils::bit_is_set<1, 2, 6>(m_low); } private: @@ -141,32 +122,32 @@ namespace xsimd constexpr bool sse2() const noexcept { - return detail::bit_is_set<26>(m_regs.reg1[3]); + return utils::bit_is_set<26>(m_regs.reg1[3]); } constexpr bool sse3() const noexcept { - return detail::bit_is_set<0>(m_regs.reg1[2]); + return utils::bit_is_set<0>(m_regs.reg1[2]); } constexpr bool ssse3() const noexcept { - return detail::bit_is_set<9>(m_regs.reg1[2]); + return utils::bit_is_set<9>(m_regs.reg1[2]); } constexpr bool sse4_1() const noexcept { - return detail::bit_is_set<19>(m_regs.reg1[2]); + return utils::bit_is_set<19>(m_regs.reg1[2]); } constexpr bool sse4_2() const noexcept { - return detail::bit_is_set<20>(m_regs.reg1[2]); + return utils::bit_is_set<20>(m_regs.reg1[2]); } constexpr bool fma3() const noexcept { - return detail::bit_is_set<12>(m_regs.reg1[2]); + return utils::bit_is_set<12>(m_regs.reg1[2]); } /** @@ -181,77 +162,77 @@ namespace xsimd */ constexpr bool osxsave() const noexcept { - return detail::bit_is_set<27>(m_regs.reg1[2]); + return utils::bit_is_set<27>(m_regs.reg1[2]); } constexpr bool avx() const noexcept { - return detail::bit_is_set<28>(m_regs.reg1[2]); + return utils::bit_is_set<28>(m_regs.reg1[2]); } constexpr bool avx2() const noexcept { - return detail::bit_is_set<5>(m_regs.reg7[1]); + return utils::bit_is_set<5>(m_regs.reg7[1]); } constexpr bool avx512f() const noexcept { - return detail::bit_is_set<16>(m_regs.reg7[1]); + return utils::bit_is_set<16>(m_regs.reg7[1]); } constexpr bool avx512dq() const noexcept { - return detail::bit_is_set<17>(m_regs.reg7[1]); + return utils::bit_is_set<17>(m_regs.reg7[1]); } constexpr bool avx512ifma() const noexcept { - return detail::bit_is_set<21>(m_regs.reg7[1]); + return utils::bit_is_set<21>(m_regs.reg7[1]); } constexpr bool avx512pf() const noexcept { - return detail::bit_is_set<26>(m_regs.reg7[1]); + return utils::bit_is_set<26>(m_regs.reg7[1]); } constexpr bool avx512er() const noexcept { - return detail::bit_is_set<27>(m_regs.reg7[1]); + return utils::bit_is_set<27>(m_regs.reg7[1]); } constexpr bool avx512cd() const noexcept { - return detail::bit_is_set<28>(m_regs.reg7[1]); + return utils::bit_is_set<28>(m_regs.reg7[1]); } constexpr bool avx512bw() const noexcept { - return detail::bit_is_set<30>(m_regs.reg7[1]); + return utils::bit_is_set<30>(m_regs.reg7[1]); } constexpr bool avx512vbmi() const noexcept { - return detail::bit_is_set<1>(m_regs.reg7[2]); + return utils::bit_is_set<1>(m_regs.reg7[2]); } constexpr bool avx512vbmi2() const noexcept { - return detail::bit_is_set<6>(m_regs.reg7[2]); + return utils::bit_is_set<6>(m_regs.reg7[2]); } constexpr bool avx512vnni_bw() const noexcept { - return detail::bit_is_set<11>(m_regs.reg7[2]); + return utils::bit_is_set<11>(m_regs.reg7[2]); } constexpr bool avxvnni() const noexcept { - return detail::bit_is_set<4>(m_regs.reg7a[0]); + return utils::bit_is_set<4>(m_regs.reg7a[0]); } constexpr bool fma4() const noexcept { - return detail::bit_is_set<16>(m_regs.reg8[2]); + return utils::bit_is_set<16>(m_regs.reg8[2]); } private: From 1b023aae1f5a254e3c73ca4e105ac13f46a01840 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:32:56 +0100 Subject: [PATCH 06/12] Fix inline header in wrong location --- include/xsimd/types/xsimd_all_registers.hpp | 2 -- include/xsimd/types/xsimd_register.hpp | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index da87df2e4..33f9b465d 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -9,8 +9,6 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#include "../config/xsimd_inline.hpp" - #include "xsimd_fma3_sse_register.hpp" #include "xsimd_fma4_register.hpp" #include "xsimd_sse2_register.hpp" diff --git a/include/xsimd/types/xsimd_register.hpp b/include/xsimd/types/xsimd_register.hpp index 018418af6..b14962e5b 100644 --- a/include/xsimd/types/xsimd_register.hpp +++ b/include/xsimd/types/xsimd_register.hpp @@ -14,6 +14,8 @@ #include +#include "../config/xsimd_inline.hpp" + namespace xsimd { namespace types From f472db6e6f3df7130a3d82101a4d815a325ac1c5 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:43:44 +0100 Subject: [PATCH 07/12] Fix cpuid aliases --- include/xsimd/xsimd_cpu_features_x86.hpp | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index ba816d2cd..ec579aaec 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -12,6 +12,7 @@ #ifndef XSIMD_CPU_FEATURES_X86_HPP #define XSIMD_CPU_FEATURES_X86_HPP +#include #include #include "./config/xsimd_config.hpp" @@ -25,9 +26,11 @@ namespace xsimd { namespace detail { - inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; + using cpuid_reg_t = std::array; + inline cpuid_reg_t get_cpuid(int level, int count = 0) noexcept; - inline std::uint32_t get_xcr0_low() noexcept; + using xcr0_reg_t = std::uint32_t; + inline xcr0_reg_t get_xcr0_low() noexcept; } /* @@ -42,7 +45,7 @@ namespace xsimd class x86_xcr0 { public: - using reg_t = std::uint32_t; + using reg_t = detail::xcr0_reg_t; /** Parse a XCR0 value into individual components. */ constexpr explicit x86_xcr0(reg_t low) noexcept @@ -81,7 +84,7 @@ namespace xsimd } private: - std::uint32_t m_low = {}; + reg_t m_low = {}; }; class x86_cpu_id @@ -89,7 +92,7 @@ namespace xsimd public: struct cpu_id_regs { - using reg_t = int[4]; + using reg_t = detail::cpuid_reg_t; reg_t reg1 = {}; reg_t reg7 = {}; @@ -113,10 +116,10 @@ namespace xsimd inline static x86_cpu_id read() { cpu_id_regs regs = {}; - detail::get_cpuid(regs.reg1, 0x1); - detail::get_cpuid(regs.reg7, 0x7); - detail::get_cpuid(regs.reg7a, 0x7, 0x1); - detail::get_cpuid(regs.reg8, 0x80000001); + regs.reg1 = detail::get_cpuid(0x1); + regs.reg7 = detail::get_cpuid(0x7); + regs.reg7a = detail::get_cpuid(0x7, 0x1); + regs.reg8 = detail::get_cpuid(0x80000001); return x86_cpu_id(regs); } @@ -241,15 +244,12 @@ namespace xsimd namespace detail { - inline void get_cpuid(int reg[4], int level, int count) noexcept + inline cpuid_reg_t get_cpuid(int level, int count) noexcept { #if !XSIMD_TARGET_X86 - reg[0] = 0; - reg[1] = 0; - reg[2] = 0; - reg[3] = 0; (void)level; (void)count; + return {}; // All bits to zero #elif defined(_MSC_VER) __cpuidex(reg, level, count); @@ -275,16 +275,16 @@ namespace xsimd #endif } - inline std::uint32_t get_xcr0_low() noexcept + inline xcr0_reg_t get_xcr0_low() noexcept { #if !XSIMD_TARGET_X86 - return {}; // return 0; + return {}; // All bits to zero #elif defined(_MSC_VER) && _MSC_VER >= 1400 - return static_cast(_xgetbv(0)); + return static_cast(_xgetbv(0)); #elif defined(__GNUC__) - std::uint32_t xcr0 = {}; + xcr0_reg_t xcr0 = {}; __asm__( "xorl %%ecx, %%ecx\n" "xgetbv\n" From a91759c9978fed8070cd16a5f4630b092ba6be40 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:53:01 +0100 Subject: [PATCH 08/12] Add future TODO --- include/xsimd/utils/bits.hpp | 1 + include/xsimd/xsimd_cpu_features_x86.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp index e3ad83e30..a8a862219 100644 --- a/include/xsimd/utils/bits.hpp +++ b/include/xsimd/utils/bits.hpp @@ -25,6 +25,7 @@ namespace xsimd template constexpr I make_bit_mask(I bit, Args... bits) { + // TODO(C++17): Use fold expression return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); } diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index ec579aaec..7af58fcd0 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -116,6 +116,7 @@ namespace xsimd inline static x86_cpu_id read() { cpu_id_regs regs = {}; + // TODO(C++20): Use designated initializer regs.reg1 = detail::get_cpuid(0x1); regs.reg7 = detail::get_cpuid(0x7); regs.reg7a = detail::get_cpuid(0x7, 0x1); From f2c54032e2b555f4ae3e9fc1401bf6d663191257 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:55:51 +0100 Subject: [PATCH 09/12] Fix missing var --- include/xsimd/xsimd_cpu_features_x86.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 7af58fcd0..cad9e2580 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -78,7 +78,7 @@ namespace xsimd constexpr bool avx512_state_os_enabled() const noexcept { - // Check all SSE, AVX, and AVX52 bits even though AVX512 must + // Check all SSE, AVX, and AVX512 bits even though AVX512 must // imply AVX and SSE return utils::bit_is_set<1, 2, 6>(m_low); } @@ -247,16 +247,18 @@ namespace xsimd { inline cpuid_reg_t get_cpuid(int level, int count) noexcept { + cpuid_reg_t reg = {}; + #if !XSIMD_TARGET_X86 (void)level; (void)count; return {}; // All bits to zero #elif defined(_MSC_VER) - __cpuidex(reg, level, count); + __cpuidex(reg.data(), level, count); #elif defined(__INTEL_COMPILER) - __cpuid(reg, level); + __cpuid(reg.data(), level); #elif defined(__GNUC__) || defined(__clang__) @@ -274,6 +276,7 @@ namespace xsimd : "0"(level), "2"(count)); #endif #endif + return reg; } inline xcr0_reg_t get_xcr0_low() noexcept From 69e3b3e09c2ccc7c4fab630cc7bd731abf3a2300 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 2 Feb 2026 16:18:27 +0100 Subject: [PATCH 10/12] Com[act lines --- include/xsimd/xsimd_cpu_features_x86.hpp | 105 +++++------------------ 1 file changed, 21 insertions(+), 84 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index cad9e2580..b725d93c4 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -124,35 +124,17 @@ namespace xsimd return x86_cpu_id(regs); } - constexpr bool sse2() const noexcept - { - return utils::bit_is_set<26>(m_regs.reg1[3]); - } + constexpr bool sse2() const noexcept { return utils::bit_is_set<26>(m_regs.reg1[3]); } - constexpr bool sse3() const noexcept - { - return utils::bit_is_set<0>(m_regs.reg1[2]); - } + constexpr bool sse3() const noexcept { return utils::bit_is_set<0>(m_regs.reg1[2]); } - constexpr bool ssse3() const noexcept - { - return utils::bit_is_set<9>(m_regs.reg1[2]); - } + constexpr bool ssse3() const noexcept { return utils::bit_is_set<9>(m_regs.reg1[2]); } - constexpr bool sse4_1() const noexcept - { - return utils::bit_is_set<19>(m_regs.reg1[2]); - } + constexpr bool sse4_1() const noexcept { return utils::bit_is_set<19>(m_regs.reg1[2]); } - constexpr bool sse4_2() const noexcept - { - return utils::bit_is_set<20>(m_regs.reg1[2]); - } + constexpr bool sse4_2() const noexcept { return utils::bit_is_set<20>(m_regs.reg1[2]); } - constexpr bool fma3() const noexcept - { - return utils::bit_is_set<12>(m_regs.reg1[2]); - } + constexpr bool fma3() const noexcept { return utils::bit_is_set<12>(m_regs.reg1[2]); } /** * Indicates whether the OS has enabled extended state management. @@ -164,80 +146,35 @@ namespace xsimd * This value is read from CPUID leaf 0x1, ECX bit 27, which reflects * the state of CR4.OSXSAVE. */ - constexpr bool osxsave() const noexcept - { - return utils::bit_is_set<27>(m_regs.reg1[2]); - } + constexpr bool osxsave() const noexcept { return utils::bit_is_set<27>(m_regs.reg1[2]); } - constexpr bool avx() const noexcept - { - return utils::bit_is_set<28>(m_regs.reg1[2]); - } + constexpr bool avx() const noexcept { return utils::bit_is_set<28>(m_regs.reg1[2]); } - constexpr bool avx2() const noexcept - { - return utils::bit_is_set<5>(m_regs.reg7[1]); - } + constexpr bool avx2() const noexcept { return utils::bit_is_set<5>(m_regs.reg7[1]); } - constexpr bool avx512f() const noexcept - { - return utils::bit_is_set<16>(m_regs.reg7[1]); - } + constexpr bool avx512f() const noexcept { return utils::bit_is_set<16>(m_regs.reg7[1]); } - constexpr bool avx512dq() const noexcept - { - return utils::bit_is_set<17>(m_regs.reg7[1]); - } + constexpr bool avx512dq() const noexcept { return utils::bit_is_set<17>(m_regs.reg7[1]); } - constexpr bool avx512ifma() const noexcept - { - return utils::bit_is_set<21>(m_regs.reg7[1]); - } + constexpr bool avx512ifma() const noexcept { return utils::bit_is_set<21>(m_regs.reg7[1]); } - constexpr bool avx512pf() const noexcept - { - return utils::bit_is_set<26>(m_regs.reg7[1]); - } + constexpr bool avx512pf() const noexcept { return utils::bit_is_set<26>(m_regs.reg7[1]); } - constexpr bool avx512er() const noexcept - { - return utils::bit_is_set<27>(m_regs.reg7[1]); - } + constexpr bool avx512er() const noexcept { return utils::bit_is_set<27>(m_regs.reg7[1]); } - constexpr bool avx512cd() const noexcept - { - return utils::bit_is_set<28>(m_regs.reg7[1]); - } + constexpr bool avx512cd() const noexcept { return utils::bit_is_set<28>(m_regs.reg7[1]); } - constexpr bool avx512bw() const noexcept - { - return utils::bit_is_set<30>(m_regs.reg7[1]); - } + constexpr bool avx512bw() const noexcept { return utils::bit_is_set<30>(m_regs.reg7[1]); } - constexpr bool avx512vbmi() const noexcept - { - return utils::bit_is_set<1>(m_regs.reg7[2]); - } + constexpr bool avx512vbmi() const noexcept { return utils::bit_is_set<1>(m_regs.reg7[2]); } - constexpr bool avx512vbmi2() const noexcept - { - return utils::bit_is_set<6>(m_regs.reg7[2]); - } + constexpr bool avx512vbmi2() const noexcept { return utils::bit_is_set<6>(m_regs.reg7[2]); } - constexpr bool avx512vnni_bw() const noexcept - { - return utils::bit_is_set<11>(m_regs.reg7[2]); - } + constexpr bool avx512vnni_bw() const noexcept { return utils::bit_is_set<11>(m_regs.reg7[2]); } - constexpr bool avxvnni() const noexcept - { - return utils::bit_is_set<4>(m_regs.reg7a[0]); - } + constexpr bool avxvnni() const noexcept { return utils::bit_is_set<4>(m_regs.reg7a[0]); } - constexpr bool fma4() const noexcept - { - return utils::bit_is_set<16>(m_regs.reg8[2]); - } + constexpr bool fma4() const noexcept { return utils::bit_is_set<16>(m_regs.reg8[2]); } private: cpu_id_regs m_regs = {}; From 09e7b1ec811c5e801e777f2fc0a263f27c9cc338 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 2 Feb 2026 16:24:20 +0100 Subject: [PATCH 11/12] Add doc --- include/xsimd/xsimd_cpu_features_x86.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index b725d93c4..eb691cf57 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -87,6 +87,15 @@ namespace xsimd reg_t m_low = {}; }; + /** + * CPU Identification (CPUID) instruction results. + * + * The CPUID instruction provides detailed information about the processor, + * including supported instruction set extensions (SSE, AVX, AVX-512, etc.). + * This utility parses CPUID leaf values to detect available CPU features. + * + * @see https://en.wikipedia.org/wiki/CPUID + */ class x86_cpu_id { public: From 8688967d45ab730d43506f2547d1f866d4cb34a0 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 2 Feb 2026 16:32:05 +0100 Subject: [PATCH 12/12] Shorter name --- include/xsimd/config/xsimd_cpuid.hpp | 52 ++++++++++++------------ include/xsimd/xsimd_cpu_features_x86.hpp | 6 +-- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index c64bfa6f8..b0f1ac47b 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -118,49 +118,49 @@ namespace xsimd const auto cpuid = xsimd::x86_cpu_id::read(); auto xcr0 = xsimd::x86_xcr0::make_false(); - bool sse_state_os_enabled = true; + bool sse_enabled = true; // AVX and AVX512 strictly require OSXSAVE to be enabled by the OS. // If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1), // AVX state won't be preserved across context switches, so AVX cannot be used. - bool avx_state_os_enabled = false; - bool avx512_state_os_enabled = false; + bool avx_enabled = false; + bool avx512_enabled = false; if (cpuid.osxsave()) { xcr0 = xsimd::x86_xcr0::read(); - sse_state_os_enabled = xcr0.sse_state_os_enabled(); - avx_state_os_enabled = xcr0.avx_state_os_enabled(); - avx512_state_os_enabled = xcr0.avx512_state_os_enabled(); + sse_enabled = xcr0.sse_enabled(); + avx_enabled = xcr0.avx_enabled(); + avx512_enabled = xcr0.avx512_enabled(); } - sse2 = cpuid.sse2() && sse_state_os_enabled; - sse3 = cpuid.sse3() && sse_state_os_enabled; - ssse3 = cpuid.ssse3() && sse_state_os_enabled; - sse4_1 = cpuid.sse4_1() && sse_state_os_enabled; - sse4_2 = cpuid.sse4_2() && sse_state_os_enabled; - fma3_sse42 = cpuid.fma3() && sse_state_os_enabled; + sse2 = cpuid.sse2() && sse_enabled; + sse3 = cpuid.sse3() && sse_enabled; + ssse3 = cpuid.ssse3() && sse_enabled; + sse4_1 = cpuid.sse4_1() && sse_enabled; + sse4_2 = cpuid.sse4_2() && sse_enabled; + fma3_sse42 = cpuid.fma3() && sse_enabled; // sse4a not implemented in cpu_id yet // xop not implemented in cpu_id yet - avx = cpuid.avx() && avx_state_os_enabled; + avx = cpuid.avx() && avx_enabled; fma3_avx = avx && fma3_sse42; - fma4 = cpuid.fma4() && avx_state_os_enabled; - avx2 = cpuid.avx2() && avx_state_os_enabled; - avxvnni = cpuid.avxvnni() && avx_state_os_enabled; + fma4 = cpuid.fma4() && avx_enabled; + avx2 = cpuid.avx2() && avx_enabled; + avxvnni = cpuid.avxvnni() && avx_enabled; fma3_avx2 = avx2 && fma3_sse42; - avx512f = cpuid.avx512f() && avx512_state_os_enabled; - avx512cd = cpuid.avx512cd() && avx512_state_os_enabled; - avx512dq = cpuid.avx512dq() && avx512_state_os_enabled; - avx512bw = cpuid.avx512bw() && avx512_state_os_enabled; - avx512er = cpuid.avx512er() && avx512_state_os_enabled; - avx512pf = cpuid.avx512pf() && avx512_state_os_enabled; - avx512ifma = cpuid.avx512ifma() && avx512_state_os_enabled; - avx512vbmi = cpuid.avx512vbmi() && avx512_state_os_enabled; - avx512vbmi2 = cpuid.avx512vbmi2() && avx512_state_os_enabled; - avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_state_os_enabled; + avx512f = cpuid.avx512f() && avx512_enabled; + avx512cd = cpuid.avx512cd() && avx512_enabled; + avx512dq = cpuid.avx512dq() && avx512_enabled; + avx512bw = cpuid.avx512bw() && avx512_enabled; + avx512er = cpuid.avx512er() && avx512_enabled; + avx512pf = cpuid.avx512pf() && avx512_enabled; + avx512ifma = cpuid.avx512ifma() && avx512_enabled; + avx512vbmi = cpuid.avx512vbmi() && avx512_enabled; + avx512vbmi2 = cpuid.avx512vbmi2() && avx512_enabled; + avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_enabled; avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; } }; diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index eb691cf57..7837d92d8 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -65,18 +65,18 @@ namespace xsimd return x86_xcr0(detail::get_xcr0_low()); } - constexpr bool sse_state_os_enabled() const noexcept + constexpr bool sse_enabled() const noexcept { return utils::bit_is_set<1>(m_low); } - constexpr bool avx_state_os_enabled() const noexcept + constexpr bool avx_enabled() const noexcept { // Check both SSE and AVX bits even though AVX must imply SSE return utils::bit_is_set<1, 2>(m_low); } - constexpr bool avx512_state_os_enabled() const noexcept + constexpr bool avx512_enabled() const noexcept { // Check all SSE, AVX, and AVX512 bits even though AVX512 must // imply AVX and SSE