From ef662f6f179fe518567a44c6a2815204d4f84024 Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 23 Dec 2025 23:17:06 -0500 Subject: [PATCH 01/35] Make argument_vector re-usable for other types. --- include/pybind11/detail/argument_vector.h | 141 ++++++++++++---------- 1 file changed, 79 insertions(+), 62 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index e15a3cfabc..c170b58483 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -63,27 +63,50 @@ union inline_array_or_vector { heap_vector(std::size_t count, VectorT value) : vec(count, value) {} }; + static_assert(std::is_nothrow_move_assignable_v, "this class is not exception safe"); + inline_array iarray; heap_vector hvector; - static_assert(std::is_trivially_move_constructible::value, - "ArrayT must be trivially move constructible"); - static_assert(std::is_trivially_destructible::value, - "ArrayT must be trivially destructible"); + bool is_inline() const { + // It is undefined behavior to access the inactive member of a + // union directly. However, it is well-defined to reinterpret_cast any + // pointer into a pointer to char and examine it as an array + // of bytes. See + // https://dev-discuss.pytorch.org/t/unionizing-for-profit-how-to-exploit-the-power-of-unions-in-c/444#the-memcpy-loophole-4 + bool result = false; + static_assert(offsetof(inline_array, is_inline) == 0, + "untagged union implementation relies on this"); + static_assert(offsetof(heap_vector, is_inline) == 0, + "untagged union implementation relies on this"); + std::memcpy(&result, reinterpret_cast(this), sizeof(bool)); + return result; + } - inline_array_or_vector() : iarray() {} - ~inline_array_or_vector() { + void destruct() { if (!is_inline()) { hvector.~heap_vector(); } + else if (!std::is_trivially_destructible::value) { + for (size_t i = 0; i < iarray.size; ++i) { + iarray.arr[i].~ArrayT(); + } + } } + + inline_array_or_vector() : iarray() {} + + ~inline_array_or_vector() { + destruct(); + } + // Disable copy ctor and assignment. inline_array_or_vector(const inline_array_or_vector &) = delete; inline_array_or_vector &operator=(const inline_array_or_vector &) = delete; inline_array_or_vector(inline_array_or_vector &&rhs) noexcept { if (rhs.is_inline()) { - std::memcpy(&iarray, &rhs.iarray, sizeof(iarray)); + new (&iarray) inline_array(std::move(rhs.iarray)); } else { new (&hvector) heap_vector(std::move(rhs.hvector)); } @@ -95,49 +118,28 @@ union inline_array_or_vector { return *this; } + destruct(); + if (rhs.is_inline()) { - if (!is_inline()) { - hvector.~heap_vector(); - } - std::memcpy(&iarray, &rhs.iarray, sizeof(iarray)); + new (&iarray) inline_array(std::move(rhs.iarray)); } else { - if (is_inline()) { - new (&hvector) heap_vector(std::move(rhs.hvector)); - } else { - hvector = std::move(rhs.hvector); - } + new (&hvector) heap_vector(std::move(rhs.hvector)); } return *this; } - bool is_inline() const { - // It is undefined behavior to access the inactive member of a - // union directly. However, it is well-defined to reinterpret_cast any - // pointer into a pointer to char and examine it as an array - // of bytes. See - // https://dev-discuss.pytorch.org/t/unionizing-for-profit-how-to-exploit-the-power-of-unions-in-c/444#the-memcpy-loophole-4 - bool result = false; - static_assert(offsetof(inline_array, is_inline) == 0, - "untagged union implementation relies on this"); - static_assert(offsetof(heap_vector, is_inline) == 0, - "untagged union implementation relies on this"); - std::memcpy(&result, reinterpret_cast(this), sizeof(bool)); - return result; - } }; -// small_vector-like container to avoid heap allocation for N or fewer -// arguments. -template -struct argument_vector { +template +struct small_vector { public: - argument_vector() = default; + small_vector() = default; // Disable copy ctor and assignment. - argument_vector(const argument_vector &) = delete; - argument_vector &operator=(const argument_vector &) = delete; - argument_vector(argument_vector &&) noexcept = default; - argument_vector &operator=(argument_vector &&) noexcept = default; + small_vector(const small_vector &) = delete; + small_vector &operator=(const small_vector &) = delete; + small_vector(small_vector &&) noexcept = default; + small_vector &operator=(small_vector &&) noexcept = default; std::size_t size() const { if (is_inline()) { @@ -146,7 +148,14 @@ struct argument_vector { return m_repr.hvector.vec.size(); } - handle &operator[](std::size_t idx) { + T const *data() const { + if (is_inline()) { + return &m_repr.iarray.arr[0]; + } + return m_repr.hvector.vec.data(); + } + + T &operator[](std::size_t idx) { assert(idx < size()); if (is_inline()) { return m_repr.iarray.arr[idx]; @@ -154,7 +163,7 @@ struct argument_vector { return m_repr.hvector.vec[idx]; } - handle operator[](std::size_t idx) const { + T operator[](std::size_t idx) const { assert(idx < size()); if (is_inline()) { return m_repr.iarray.arr[idx]; @@ -162,28 +171,28 @@ struct argument_vector { return m_repr.hvector.vec[idx]; } - void push_back(handle x) { + void push_back(T x) { if (is_inline()) { auto &ha = m_repr.iarray; - if (ha.size == N) { - move_to_heap_vector_with_reserved_size(N + 1); - push_back_slow_path(x); + if (ha.size == InlineSize) { + move_to_heap_vector_with_reserved_size(InlineSize + 1); + push_back_slow_path(std::move(x)); } else { - ha.arr[ha.size++] = x; + ha.arr[ha.size++] = std::move(x); } } else { - push_back_slow_path(x); + push_back_slow_path(std::move(x)); } } template void emplace_back(Arg &&x) { - push_back(handle(x)); + push_back(T(x)); } void reserve(std::size_t sz) { if (is_inline()) { - if (sz > N) { + if (sz > InlineSize) { move_to_heap_vector_with_reserved_size(sz); } } else { @@ -192,7 +201,7 @@ struct argument_vector { } private: - using repr_type = inline_array_or_vector; + using repr_type = inline_array_or_vector; repr_type m_repr; PYBIND11_NOINLINE void move_to_heap_vector_with_reserved_size(std::size_t reserved_size) { @@ -201,32 +210,32 @@ struct argument_vector { using heap_vector = typename repr_type::heap_vector; heap_vector hv; hv.vec.reserve(reserved_size); - std::copy(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); + static_assert(std::is_nothrow_move_assignable_v, "this class is not exception safe"); + std::move(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); new (&m_repr.hvector) heap_vector(std::move(hv)); } - PYBIND11_NOINLINE void push_back_slow_path(handle x) { m_repr.hvector.vec.push_back(x); } + PYBIND11_NOINLINE void push_back_slow_path(T x) { m_repr.hvector.vec.push_back(std::move(x)); } PYBIND11_NOINLINE void reserve_slow_path(std::size_t sz) { m_repr.hvector.vec.reserve(sz); } bool is_inline() const { return m_repr.is_inline(); } }; -// small_vector-like container to avoid heap allocation for N or fewer -// arguments. +// Container to avoid heap allocation for kRequestedInlineSize or fewer booleans. template -struct args_convert_vector { +struct small_vector { private: public: - args_convert_vector() = default; + small_vector() = default; // Disable copy ctor and assignment. - args_convert_vector(const args_convert_vector &) = delete; - args_convert_vector &operator=(const args_convert_vector &) = delete; - args_convert_vector(args_convert_vector &&) noexcept = default; - args_convert_vector &operator=(args_convert_vector &&) noexcept = default; + small_vector(const small_vector &) = delete; + small_vector &operator=(const small_vector &) = delete; + small_vector(small_vector &&) noexcept = default; + small_vector &operator=(small_vector &&) noexcept = default; - args_convert_vector(std::size_t count, bool value) { + small_vector(std::size_t count, bool value) { if (count > kInlineSize) { new (&m_repr.hvector) typename repr_type::heap_vector(count, value); } else { @@ -284,7 +293,7 @@ struct args_convert_vector { } } - void swap(args_convert_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); } + void swap(small_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); } private: struct WordAndBitIndex { @@ -326,5 +335,13 @@ struct args_convert_vector { bool is_inline() const { return m_repr.is_inline(); } }; +// Container to avoid heap allocation for N or fewer arguments. +template +using argument_vector = small_vector; + +// Container to avoid heap allocation for N or fewer booleans. +template +using args_convert_vector = small_vector; + PYBIND11_NAMESPACE_END(detail) PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) From 418a034195c5c8b517e7404686555c097efe7a4b Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 23 Dec 2025 23:17:31 -0500 Subject: [PATCH 02/35] Attempt to collect args into array for vectorcall --- include/pybind11/cast.h | 120 ++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 310b77b342..5f258411c2 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2198,24 +2198,50 @@ class simple_collector { public: template explicit simple_collector(Ts &&...values) - : m_args(pybind11::make_tuple(std::forward(values)...)) {} - - const tuple &args() const & { return m_args; } - dict kwargs() const { return {}; } + { + m_args.reserve(sizeof...(values)+1); + m_args.push_back(object()); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET - tuple args() && { return std::move(m_args); } + using expander = int[]; + (void) expander{0, (process(std::forward(values)), 0)...}; + } /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); + static_assert(sizeof(m_args[0]) == sizeof(PyObject*), "pybind11::object must be castable to PyObject* for this to work"); + PyObject *result = PyObject_Vectorcall(ptr, reinterpret_cast(m_args.data()+1), (m_args.size()-1)|PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr); if (!result) { throw error_already_set(); } return reinterpret_steal(result); } + tuple args() const { + tuple val(m_args.size() - 1); + for (size_t i = 1; i < m_args.size(); ++i) + val[i-1] = m_args[i]; + return val; + } + + dict kwargs() const { return {}; } + private: - tuple m_args; + template + void process(T &&x) { + auto o = reinterpret_steal( + detail::make_caster::cast(std::forward(x), policy, {})); + if (!o) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size())); +#else + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()), + type_id()); +#endif + } + m_args.push_back(std::move(o)); + } + + small_vector m_args; }; /// Helper class which collects positional, keyword, * and ** arguments for a Python function call @@ -2224,53 +2250,73 @@ class unpacking_collector { public: template explicit unpacking_collector(Ts &&...values) { - // Tuples aren't (easily) resizable so a list is needed for collection, - // but the actual function call strictly requires a tuple. - auto args_list = list(); + auto names_list = list(); + m_args.reserve(sizeof...(values) + 1); + m_args.push_back(object()); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + using expander = int[]; - (void) expander{0, (process(args_list, std::forward(values)), 0)...}; + (void) expander{0, (process(names_list, std::forward(values)), 0)...}; - m_args = std::move(args_list); + m_names = tuple(names_list.size()); + for (size_t i = 0; i < names_list.size(); ++i) + m_names[i] = std::move(names_list[i]); } - const tuple &args() const & { return m_args; } - const dict &kwargs() const & { return m_kwargs; } - - tuple args() && { return std::move(m_args); } - dict kwargs() && { return std::move(m_kwargs); } - /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); + static_assert(sizeof(m_args[0]) == sizeof(PyObject*), "pybind11::object must be castable to PyObject* for this to work"); + PyObject *result = PyObject_Vectorcall(ptr, reinterpret_cast(m_args.data()+1), (m_args.size()-1)|PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); if (!result) { throw error_already_set(); } return reinterpret_steal(result); } + tuple args() const { + // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET + size_t count = m_args.size() - 1 - m_names.size(); + tuple val(count); + for (size_t i = 0; i < count; ++i) { + // +1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET + val[i] = m_args[i + 1]; + } + return val; + } + + dict kwargs() const { + size_t offset = m_args.size() - m_names.size(); + dict val; + for (size_t i = 0; i < m_names.size(); ++i, ++offset) { + val[m_names[i]] = m_args[offset]; + } + return val; + } + private: template - void process(list &args_list, T &&x) { + void process(list &/*names_list*/, T &&x) { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size())); + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size())); #else - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()), + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()), type_id()); #endif } - args_list.append(std::move(o)); + m_args.push_back(std::move(o)); + // collect_arguments() below guarantees that no keyword args have preceeded this arg. } - void process(list &args_list, detail::args_proxy ap) { + void process(list &/*names_list*/, detail::args_proxy ap) { for (auto a : ap) { - args_list.append(a); + m_args.push_back(reinterpret_borrow(a)); } + // collect_arguments() below guarantees that no keyword args have preceeded this arg. } - void process(list & /*args_list*/, arg_v a) { + void process(list &names_list, arg_v a) { if (!a.name) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); @@ -2278,7 +2324,8 @@ class unpacking_collector { nameless_argument_error(a.type); #endif } - if (m_kwargs.contains(a.name)) { + auto name = str(a.name); + if (names_list.contains(name)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else @@ -2292,23 +2339,28 @@ class unpacking_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); #endif } - m_kwargs[a.name] = std::move(a.value); + names_list.append(std::move(name)); + m_args.push_back(std::move(a.value)); + // collect_arguments() below guarantees that no positional args will come after this arg } - void process(list & /*args_list*/, detail::kwargs_proxy kp) { + void process(list &names_list, detail::kwargs_proxy kp) { if (!kp) { return; } for (auto k : reinterpret_borrow(kp)) { - if (m_kwargs.contains(k.first)) { + auto name = str(k.first); + if (names_list.contains(name)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else - multiple_values_error(str(k.first)); + multiple_values_error(name); #endif } - m_kwargs[k.first] = k.second; + names_list.append(std::move(name)); + m_args.push_back(reinterpret_borrow(k.second)); } + // collect_arguments() below guarantees that no positional args will come after these args } [[noreturn]] static void nameless_argument_error() { @@ -2333,8 +2385,8 @@ class unpacking_collector { } private: - tuple m_args; - dict m_kwargs; + small_vector m_args; + tuple m_names; }; // [workaround(intel)] Separate function required here From fc22c9750a8176b830fe92a7a27c9ceaeee4223d Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 24 Dec 2025 20:23:13 -0500 Subject: [PATCH 03/35] Revert "Attempt to collect args into array for vectorcall" This reverts commit 418a034195c5c8b517e7404686555c097efe7a4b. --- include/pybind11/cast.h | 120 ++++++++++++---------------------------- 1 file changed, 34 insertions(+), 86 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 5f258411c2..310b77b342 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2198,50 +2198,24 @@ class simple_collector { public: template explicit simple_collector(Ts &&...values) - { - m_args.reserve(sizeof...(values)+1); - m_args.push_back(object()); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + : m_args(pybind11::make_tuple(std::forward(values)...)) {} - using expander = int[]; - (void) expander{0, (process(std::forward(values)), 0)...}; - } + const tuple &args() const & { return m_args; } + dict kwargs() const { return {}; } + + tuple args() && { return std::move(m_args); } /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - static_assert(sizeof(m_args[0]) == sizeof(PyObject*), "pybind11::object must be castable to PyObject* for this to work"); - PyObject *result = PyObject_Vectorcall(ptr, reinterpret_cast(m_args.data()+1), (m_args.size()-1)|PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr); + PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); if (!result) { throw error_already_set(); } return reinterpret_steal(result); } - tuple args() const { - tuple val(m_args.size() - 1); - for (size_t i = 1; i < m_args.size(); ++i) - val[i-1] = m_args[i]; - return val; - } - - dict kwargs() const { return {}; } - private: - template - void process(T &&x) { - auto o = reinterpret_steal( - detail::make_caster::cast(std::forward(x), policy, {})); - if (!o) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size())); -#else - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()), - type_id()); -#endif - } - m_args.push_back(std::move(o)); - } - - small_vector m_args; + tuple m_args; }; /// Helper class which collects positional, keyword, * and ** arguments for a Python function call @@ -2250,73 +2224,53 @@ class unpacking_collector { public: template explicit unpacking_collector(Ts &&...values) { - auto names_list = list(); - m_args.reserve(sizeof...(values) + 1); - m_args.push_back(object()); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET - + // Tuples aren't (easily) resizable so a list is needed for collection, + // but the actual function call strictly requires a tuple. + auto args_list = list(); using expander = int[]; - (void) expander{0, (process(names_list, std::forward(values)), 0)...}; + (void) expander{0, (process(args_list, std::forward(values)), 0)...}; - m_names = tuple(names_list.size()); - for (size_t i = 0; i < names_list.size(); ++i) - m_names[i] = std::move(names_list[i]); + m_args = std::move(args_list); } + const tuple &args() const & { return m_args; } + const dict &kwargs() const & { return m_kwargs; } + + tuple args() && { return std::move(m_args); } + dict kwargs() && { return std::move(m_kwargs); } + /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - static_assert(sizeof(m_args[0]) == sizeof(PyObject*), "pybind11::object must be castable to PyObject* for this to work"); - PyObject *result = PyObject_Vectorcall(ptr, reinterpret_cast(m_args.data()+1), (m_args.size()-1)|PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); + PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); if (!result) { throw error_already_set(); } return reinterpret_steal(result); } - tuple args() const { - // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET - size_t count = m_args.size() - 1 - m_names.size(); - tuple val(count); - for (size_t i = 0; i < count; ++i) { - // +1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET - val[i] = m_args[i + 1]; - } - return val; - } - - dict kwargs() const { - size_t offset = m_args.size() - m_names.size(); - dict val; - for (size_t i = 0; i < m_names.size(); ++i, ++offset) { - val[m_names[i]] = m_args[offset]; - } - return val; - } - private: template - void process(list &/*names_list*/, T &&x) { + void process(list &args_list, T &&x) { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size())); + throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size())); #else - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()), + throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()), type_id()); #endif } - m_args.push_back(std::move(o)); - // collect_arguments() below guarantees that no keyword args have preceeded this arg. + args_list.append(std::move(o)); } - void process(list &/*names_list*/, detail::args_proxy ap) { + void process(list &args_list, detail::args_proxy ap) { for (auto a : ap) { - m_args.push_back(reinterpret_borrow(a)); + args_list.append(a); } - // collect_arguments() below guarantees that no keyword args have preceeded this arg. } - void process(list &names_list, arg_v a) { + void process(list & /*args_list*/, arg_v a) { if (!a.name) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); @@ -2324,8 +2278,7 @@ class unpacking_collector { nameless_argument_error(a.type); #endif } - auto name = str(a.name); - if (names_list.contains(name)) { + if (m_kwargs.contains(a.name)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else @@ -2339,28 +2292,23 @@ class unpacking_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); #endif } - names_list.append(std::move(name)); - m_args.push_back(std::move(a.value)); - // collect_arguments() below guarantees that no positional args will come after this arg + m_kwargs[a.name] = std::move(a.value); } - void process(list &names_list, detail::kwargs_proxy kp) { + void process(list & /*args_list*/, detail::kwargs_proxy kp) { if (!kp) { return; } for (auto k : reinterpret_borrow(kp)) { - auto name = str(k.first); - if (names_list.contains(name)) { + if (m_kwargs.contains(k.first)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else - multiple_values_error(name); + multiple_values_error(str(k.first)); #endif } - names_list.append(std::move(name)); - m_args.push_back(reinterpret_borrow(k.second)); + m_kwargs[k.first] = k.second; } - // collect_arguments() below guarantees that no positional args will come after these args } [[noreturn]] static void nameless_argument_error() { @@ -2385,8 +2333,8 @@ class unpacking_collector { } private: - small_vector m_args; - tuple m_names; + tuple m_args; + dict m_kwargs; }; // [workaround(intel)] Separate function required here From 9894a5a7ee0d43c3d4906da5724abdf81de32c31 Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 24 Dec 2025 22:12:25 -0500 Subject: [PATCH 04/35] Implement vectorcall args collector --- include/pybind11/cast.h | 212 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 310b77b342..02f952b1ea 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2346,6 +2346,16 @@ constexpr bool args_are_all_positional() { return all_of...>::value; } +// [workaround(intel)] Separate function required here +// We need to put this into a separate function because the Intel compiler +// fails to compile enable_if_t...>::value> +// (tested with ICC 2021.1 Beta 20200827). +template +constexpr bool args_have_kw() { + return none_of...>::value; +} + +#if PY_VERSION_HEX < 0x030C0000 /// Collect only positional arguments for a Python function call template collect_arguments(Args &&...args) { "* unpacking must precede ** unpacking"); return unpacking_collector(std::forward(args)...); } +#else +/// Helper class which collects positional, keyword, * and ** arguments for a Python function call +template +class unpacking_vectorcall_collector { +public: + template + explicit unpacking_vectorcall_collector(Ts &&...values) { + + m_args.reserve(sizeof...(values) + 1); + m_args.push_back(nullptr); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + + // collect_arguments guarantees this can't be constructed with kwargs before the last positional + // so we don't need to worry about Ts... being in anything but normal python order. + using expander = int[]; + if (args_have_kw()) { + (void) expander{0, (positional(std::forward(values)), 0)...}; + } + else { + list names_list; + + (void) expander{0, (process(names_list, std::forward(values)), 0)...}; + + if (!names_list.empty()) + m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); + } + } + + /// Call a Python function and pass the collected arguments + object call(PyObject *ptr) const { + // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET + size_t nargs = m_args.size() - 1; + if (m_names) { + nargs -= PyTuple_GET_SIZE(m_names.ptr()); + } + PyObject *result = PyObject_Vectorcall( + ptr, + m_args.data()+1, + nargs|PY_VECTORCALL_ARGUMENTS_OFFSET, + m_names.ptr() + ); + if (!result) { + throw error_already_set(); + } + return reinterpret_steal(result); + } + + tuple args() const { + // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET + size_t nargs = m_args.size() - 1; + if (m_names) { + nargs -= PyTuple_GET_SIZE(m_names.ptr()); + } + tuple val(nargs); + for (size_t i = 0; i < nargs; ++i) { + // +1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET + val[i] = reinterpret_borrow(m_args[i + 1]); + } + return val; + } + + dict kwargs() const { + dict val; + if (m_names) + { + tuple namestup(m_names); + size_t offset = m_args.size() - namestup.size(); + for (size_t i = 0; i < namestup.size(); ++i, ++offset) { + val[namestup[i]] = reinterpret_borrow(m_args[offset]); + } + } + return val; + } + +private: + // normal argument, possibly needing conversion + template + void positional(T &&x) { + auto o = reinterpret_steal( + detail::make_caster::cast(std::forward(x), policy, {})); + if (!o) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()-1)); +#else + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()-1), + type_id()); +#endif + } + m_args.push_back(o.ptr()); + m_temp.push_back(std::move(o)); + } + + void positional(detail::args_proxy ap) { + if (!ap) { + return; + } + for (auto a : ap) { + m_temp.push_back(reinterpret_borrow(a)); // keep alive + m_args.push_back(a.ptr()); + } + } + + // normal argument, possibly needing conversion + template + void process(list &/*names_list*/, T &&x) { + positional(std::forward(x)); + } + + // * unpacking + void process(list &/*names_list*/, detail::args_proxy ap) { + positional(ap); + } + + // named argument + void process(list &names_list, arg_v a) { + if (!a.name) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + nameless_argument_error(); +#else + nameless_argument_error(a.type); +#endif + } + auto name = str(a.name); + if (names_list.contains(name)) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + multiple_values_error(); +#else + multiple_values_error(a.name); +#endif + } + if (!a.value) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + throw cast_error_unable_to_convert_call_arg(a.name); +#else + throw cast_error_unable_to_convert_call_arg(a.name, a.type); +#endif + } + names_list.append(std::move(name)); + m_temp.push_back(a.value); // keep alive + m_args.push_back(a.value.ptr()); + } + + // ** unpacking + void process(list &names_list, detail::kwargs_proxy kp) { + if (!kp) { + return; + } + for (auto&& k : reinterpret_borrow(kp)) { + auto name = str(k.first); + if (names_list.contains(name)) { +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + multiple_values_error(); +#else + multiple_values_error(name); +#endif + } + names_list.append(std::move(name)); + m_temp.push_back(reinterpret_borrow(k.second)); // keep alive + m_args.push_back(k.second.ptr()); + } + } + + [[noreturn]] static void nameless_argument_error() { + throw type_error( + "Got kwargs without a name; only named arguments " + "may be passed via py::arg() to a python function call. " + "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)"); + } + [[noreturn]] static void nameless_argument_error(const std::string &type) { + throw type_error("Got kwargs without a name of type '" + type + + "'; only named " + "arguments may be passed via py::arg() to a python function call. "); + } + [[noreturn]] static void multiple_values_error() { + throw type_error( + "Got multiple values for keyword argument " + "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)"); + } + + [[noreturn]] static void multiple_values_error(const std::string &name) { + throw type_error("Got multiple values for keyword argument '" + name + "'"); + } + +private: + small_vector m_args; + object m_names; // todo: use m_temp for this? + small_vector m_temp; +}; + +/// Collect all arguments, including keywords and unpacking for vectorcall +template +unpacking_vectorcall_collector collect_arguments(Args &&...args) { + // Following argument order rules for generalized unpacking according to PEP 448 + static_assert(constexpr_last() + < constexpr_first() + && constexpr_last() + < constexpr_first(), + "Invalid function call: positional args must precede keywords and ** unpacking; " + "* unpacking must precede ** unpacking"); + return unpacking_vectorcall_collector(std::forward(args)...); +} +#endif template template From 73ee046235fe31c682995af74e04c8b367cc47de Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 24 Dec 2025 22:14:58 -0500 Subject: [PATCH 05/35] pre-commit fixes --- include/pybind11/cast.h | 75 ++++++++++------------- include/pybind11/detail/argument_vector.h | 8 +-- 2 files changed, 36 insertions(+), 47 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 02f952b1ea..7baef34c86 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2385,18 +2385,19 @@ class unpacking_vectorcall_collector { public: template explicit unpacking_vectorcall_collector(Ts &&...values) { - + m_args.reserve(sizeof...(values) + 1); - m_args.push_back(nullptr); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + m_args.push_back( + nullptr); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET - // collect_arguments guarantees this can't be constructed with kwargs before the last positional - // so we don't need to worry about Ts... being in anything but normal python order. + // collect_arguments guarantees this can't be constructed with kwargs before the last + // positional so we don't need to worry about Ts... being in anything but normal python + // order. using expander = int[]; if (args_have_kw()) { (void) expander{0, (positional(std::forward(values)), 0)...}; - } - else { - list names_list; + } else { + list names_list; (void) expander{0, (process(names_list, std::forward(values)), 0)...}; @@ -2413,11 +2414,7 @@ class unpacking_vectorcall_collector { nargs -= PyTuple_GET_SIZE(m_names.ptr()); } PyObject *result = PyObject_Vectorcall( - ptr, - m_args.data()+1, - nargs|PY_VECTORCALL_ARGUMENTS_OFFSET, - m_names.ptr() - ); + ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); if (!result) { throw error_already_set(); } @@ -2440,8 +2437,7 @@ class unpacking_vectorcall_collector { dict kwargs() const { dict val; - if (m_names) - { + if (m_names) { tuple namestup(m_names); size_t offset = m_args.size() - namestup.size(); for (size_t i = 0; i < namestup.size(); ++i, ++offset) { @@ -2452,18 +2448,18 @@ class unpacking_vectorcall_collector { } private: - // normal argument, possibly needing conversion + // normal argument, possibly needing conversion template void positional(T &&x) { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()-1)); -#else - throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size()-1), +# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1)); +# else + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1), type_id()); -#endif +# endif } m_args.push_back(o.ptr()); m_temp.push_back(std::move(o)); @@ -2479,40 +2475,38 @@ class unpacking_vectorcall_collector { } } - // normal argument, possibly needing conversion + // normal argument, possibly needing conversion template - void process(list &/*names_list*/, T &&x) { + void process(list & /*names_list*/, T &&x) { positional(std::forward(x)); } // * unpacking - void process(list &/*names_list*/, detail::args_proxy ap) { - positional(ap); - } + void process(list & /*names_list*/, detail::args_proxy ap) { positional(ap); } // named argument void process(list &names_list, arg_v a) { if (!a.name) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); -#else +# else nameless_argument_error(a.type); -#endif +# endif } auto name = str(a.name); if (names_list.contains(name)) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); -#else +# else multiple_values_error(a.name); -#endif +# endif } if (!a.value) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) throw cast_error_unable_to_convert_call_arg(a.name); -#else +# else throw cast_error_unable_to_convert_call_arg(a.name, a.type); -#endif +# endif } names_list.append(std::move(name)); m_temp.push_back(a.value); // keep alive @@ -2524,14 +2518,14 @@ class unpacking_vectorcall_collector { if (!kp) { return; } - for (auto&& k : reinterpret_borrow(kp)) { + for (auto &&k : reinterpret_borrow(kp)) { auto name = str(k.first); if (names_list.contains(name)) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); -#else +# else multiple_values_error(name); -#endif +# endif } names_list.append(std::move(name)); m_temp.push_back(reinterpret_borrow(k.second)); // keep alive @@ -2561,14 +2555,13 @@ class unpacking_vectorcall_collector { } private: - small_vector m_args; + small_vector m_args; object m_names; // todo: use m_temp for this? small_vector m_temp; }; /// Collect all arguments, including keywords and unpacking for vectorcall -template +template unpacking_vectorcall_collector collect_arguments(Args &&...args) { // Following argument order rules for generalized unpacking according to PEP 448 static_assert(constexpr_last() diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index c170b58483..b35ec060bd 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -86,8 +86,7 @@ union inline_array_or_vector { void destruct() { if (!is_inline()) { hvector.~heap_vector(); - } - else if (!std::is_trivially_destructible::value) { + } else if (!std::is_trivially_destructible::value) { for (size_t i = 0; i < iarray.size; ++i) { iarray.arr[i].~ArrayT(); } @@ -96,9 +95,7 @@ union inline_array_or_vector { inline_array_or_vector() : iarray() {} - ~inline_array_or_vector() { - destruct(); - } + ~inline_array_or_vector() { destruct(); } // Disable copy ctor and assignment. inline_array_or_vector(const inline_array_or_vector &) = delete; @@ -127,7 +124,6 @@ union inline_array_or_vector { } return *this; } - }; template From 4615f8a8230ee944b8597615dd0e6c1571da1bdf Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 25 Dec 2025 00:20:20 -0500 Subject: [PATCH 06/35] Checkpoint in moving to METH_FASTCALL --- include/pybind11/detail/argument_vector.h | 10 ++ include/pybind11/pybind11.h | 171 ++++++++++++---------- 2 files changed, 100 insertions(+), 81 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index b35ec060bd..532e324623 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -196,6 +196,16 @@ struct small_vector { } } + void sort() { + T *begin; + if (is_inline()) { + begin = &m_repr.iarray.arr[0]; + } else { + begin = m_repr.hvector.vec.data(); + } + std::sort(begin, begin + size()); + } + private: using repr_type = inline_array_or_vector; repr_type m_repr; diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index f93272628d..f5c7b7f92a 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -684,7 +684,7 @@ class cpp_function : public function { rec->def->ml_name = rec->name; rec->def->ml_meth = reinterpret_cast(reinterpret_cast(dispatcher)); - rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; + rec->def->ml_flags = METH_FASTCALL | METH_KEYWORDS; object py_func_rec = detail::function_record_PyObject_New(); (reinterpret_cast(py_func_rec.ptr()))->cpp_func_rec @@ -847,7 +847,7 @@ class cpp_function : public function { } /// Main dispatch logic for calls to functions bound using pybind11 - static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) { + static PyObject *dispatcher(PyObject *self, PyObject * const *args_in_arr, Py_ssize_t nargsf, PyObject *kwnames_in) { using namespace detail; const function_record *overloads = function_record_ptr_from_PyObject(self); assert(overloads != nullptr); @@ -857,10 +857,12 @@ class cpp_function : public function { /* Need to know how many arguments + keyword arguments there are to pick the right overload */ - const auto n_args_in = static_cast(PyTuple_GET_SIZE(args_in)); + const size_t n_args_in = PyVectorcall_NARGS(nargsf); - handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr, + handle parent = n_args_in > 0 ? args_in_arr[0] : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; + + auto kwnames = reinterpret_borrow(kwnames_in); auto self_value_and_holder = value_and_holder(); if (overloads->is_constructor) { @@ -948,7 +950,7 @@ class cpp_function : public function { self_value_and_holder.type->dealloc(self_value_and_holder); } - call.init_self = PyTuple_GET_ITEM(args_in, 0); + call.init_self = args_in_arr[0]; call.args.emplace_back(reinterpret_cast(&self_value_and_holder)); call.args_convert.push_back(false); ++args_copied; @@ -959,17 +961,29 @@ class cpp_function : public function { for (; args_copied < args_to_copy; ++args_copied) { const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr; - if (kwargs_in && arg_rec && arg_rec->name - && dict_getitemstring(kwargs_in, arg_rec->name)) { - bad_arg = true; - break; + + /* if the argument is listed in the call site's kwargs, but the argument is also fulfilled positionally, then the call can't match this overload. + for example, the call site is: + foo(0, key=1) + but our overload is + foo(key:int) + then this call can't be for us, because it would be invalid. + */ + if (kwnames && arg_rec && arg_rec->name) + { + pybind11::str arg_name(arg_rec->name); + if (PySequence_Contains(kwnames.ptr(), arg_name.ptr())) { + bad_arg = true; + break; + } } - handle arg(PyTuple_GET_ITEM(args_in, args_copied)); + handle arg(args_in_arr[args_copied]); if (arg_rec && !arg_rec->none && arg.is_none()) { bad_arg = true; break; } + call.args.push_back(arg); call.args_convert.push_back(arg_rec ? arg_rec->convert : true); } @@ -981,20 +995,12 @@ class cpp_function : public function { // to copy the rest into a py::args argument. size_t positional_args_copied = args_copied; - // We'll need to copy this if we steal some kwargs for defaults - dict kwargs = reinterpret_borrow(kwargs_in); - // 1.5. Fill in any missing pos_only args from defaults if they exist if (args_copied < func.nargs_pos_only) { for (; args_copied < func.nargs_pos_only; ++args_copied) { const auto &arg_rec = func.args[args_copied]; - handle value; - if (arg_rec.value) { - value = arg_rec.value; - } - if (value) { - call.args.push_back(value); + call.args.push_back(arg_rec.value); call.args_convert.push_back(arg_rec.convert); } else { break; @@ -1007,46 +1013,46 @@ class cpp_function : public function { } // 2. Check kwargs and, failing that, defaults that may help complete the list + small_vector used_kwargs; + if (kwnames) + used_kwargs.reserve(kwnames.size()); if (args_copied < num_args) { - bool copied_kwargs = false; - for (; args_copied < num_args; ++args_copied) { const auto &arg_rec = func.args[args_copied]; handle value; - if (kwargs_in && arg_rec.name) { - value = dict_getitemstring(kwargs.ptr(), arg_rec.name); + if (kwnames && arg_rec.name) { + auto n = n_args_in; + pybind11::str arg_name(arg_rec.name); + for (auto name : kwnames) { + if (PyUnicode_Compare(name.ptr(), arg_name.ptr()) == 0) { + value = args_in_arr[n]; + used_kwargs.emplace_back(n); + break; + } + ++n; + } } - if (value) { - // Consume a kwargs value - if (!copied_kwargs) { - kwargs = reinterpret_steal(PyDict_Copy(kwargs.ptr())); - copied_kwargs = true; - } - if (PyDict_DelItemString(kwargs.ptr(), arg_rec.name) == -1) { - throw error_already_set(); - } - } else if (arg_rec.value) { + if (!value) { value = arg_rec.value; + if (!value) { + break; + } } if (!arg_rec.none && value.is_none()) { break; } - if (value) { - // If we're at the py::args index then first insert a stub for it to be - // replaced later - if (func.has_args && call.args.size() == func.nargs_pos) { - call.args.push_back(none()); - } - - call.args.push_back(value); - call.args_convert.push_back(arg_rec.convert); - } else { - break; + // If we're at the py::args index then first insert a stub for it to be + // replaced later + if (func.has_args && call.args.size() == func.nargs_pos) { + call.args.push_back(none()); } + + call.args.push_back(value); + call.args_convert.push_back(arg_rec.convert); } if (args_copied < num_args) { @@ -1056,39 +1062,43 @@ class cpp_function : public function { } // 3. Check everything was consumed (unless we have a kwargs arg) - if (kwargs && !kwargs.empty() && !func.has_kwargs) { + if (!func.has_kwargs && kwnames && kwnames.size() > used_kwargs.size()) { continue; // Unconsumed kwargs, but no py::kwargs argument to accept them } // 4a. If we have a py::args argument, create a new tuple with leftovers if (func.has_args) { - tuple extra_args; - if (args_to_copy == 0) { - // We didn't copy out any position arguments from the args_in tuple, so we - // can reuse it directly without copying: - extra_args = reinterpret_borrow(args_in); - } else if (positional_args_copied >= n_args_in) { - extra_args = tuple(0); + if (positional_args_copied >= n_args_in) { + call.args_ref = tuple(0); } else { size_t args_size = n_args_in - positional_args_copied; - extra_args = tuple(args_size); + tuple extra_args(args_size); for (size_t i = 0; i < args_size; ++i) { - extra_args[i] = PyTuple_GET_ITEM(args_in, positional_args_copied + i); + extra_args[i] = reinterpret_borrow(args_in_arr[positional_args_copied + i]); } + call.args_ref = std::move(extra_args); } if (call.args.size() <= func.nargs_pos) { - call.args.push_back(extra_args); + call.args.push_back(call.args_ref); } else { - call.args[func.nargs_pos] = extra_args; + call.args[func.nargs_pos] = call.args_ref; } call.args_convert.push_back(false); - call.args_ref = std::move(extra_args); } // 4b. If we have a py::kwargs, pass on any remaining kwargs if (func.has_kwargs) { - if (!kwargs.ptr()) { - kwargs = dict(); // If we didn't get one, send an empty one + dict kwargs; + size_t used_i = 0; + size_t name_count = kwnames ? kwnames.size() : 0; + used_kwargs.sort(); + for (size_t i = 0; i < name_count; ++i) { + if (used_i < used_kwargs.size() && used_kwargs[used_i] == i) { + ++used_i; + } + else { + kwargs[kwnames[i]] = reinterpret_borrow(args_in_arr[n_args_in + i]); + } } call.args.push_back(kwargs); call.args_convert.push_back(false); @@ -1227,41 +1237,40 @@ class cpp_function : public function { msg += '\n'; } msg += "\nInvoked with: "; - auto args_ = reinterpret_borrow(args_in); bool some_args = false; - for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) { + size_t ti = overloads->is_constructor ? 1 : 0; + for (; ti < n_args_in; ++ti) { if (!some_args) { some_args = true; } else { msg += ", "; } try { - msg += pybind11::repr(args_[ti]); + msg += pybind11::repr(args_in_arr[ti]); } catch (const error_already_set &) { msg += ""; } } - if (kwargs_in) { - auto kwargs = reinterpret_borrow(kwargs_in); - if (!kwargs.empty()) { - if (some_args) { - msg += "; "; + if (kwnames && !kwnames.empty()) { + if (some_args) { + msg += "; "; + } + msg += "kwargs: "; + bool first = true; + for (const auto &kwarg : kwnames) { + if (first) { + first = false; + } else { + msg += ", "; } - msg += "kwargs: "; - bool first = true; - for (const auto &kwarg : kwargs) { - if (first) { - first = false; - } else { - msg += ", "; - } - msg += pybind11::str("{}=").format(kwarg.first); - try { - msg += pybind11::repr(kwarg.second); - } catch (const error_already_set &) { - msg += ""; - } + msg += pybind11::str(kwarg); + msg += '='; + try { + msg += pybind11::repr(args_in_arr[ti]); + } catch (const error_already_set &) { + msg += ""; } + ++ti; } } From 1dc7b3fb1ae18b433e8dd969c0772e52b9039644 Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 25 Dec 2025 00:20:55 -0500 Subject: [PATCH 07/35] pre-commit fixes --- include/pybind11/pybind11.h | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index f5c7b7f92a..25f50de7e0 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -847,7 +847,10 @@ class cpp_function : public function { } /// Main dispatch logic for calls to functions bound using pybind11 - static PyObject *dispatcher(PyObject *self, PyObject * const *args_in_arr, Py_ssize_t nargsf, PyObject *kwnames_in) { + static PyObject *dispatcher(PyObject *self, + PyObject *const *args_in_arr, + Py_ssize_t nargsf, + PyObject *kwnames_in) { using namespace detail; const function_record *overloads = function_record_ptr_from_PyObject(self); assert(overloads != nullptr); @@ -861,7 +864,7 @@ class cpp_function : public function { handle parent = n_args_in > 0 ? args_in_arr[0] : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; - + auto kwnames = reinterpret_borrow(kwnames_in); auto self_value_and_holder = value_and_holder(); @@ -961,16 +964,13 @@ class cpp_function : public function { for (; args_copied < args_to_copy; ++args_copied) { const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr; - - /* if the argument is listed in the call site's kwargs, but the argument is also fulfilled positionally, then the call can't match this overload. - for example, the call site is: - foo(0, key=1) - but our overload is - foo(key:int) - then this call can't be for us, because it would be invalid. + + /* if the argument is listed in the call site's kwargs, but the argument is + also fulfilled positionally, then the call can't match this overload. for + example, the call site is: foo(0, key=1) but our overload is foo(key:int) then + this call can't be for us, because it would be invalid. */ - if (kwnames && arg_rec && arg_rec->name) - { + if (kwnames && arg_rec && arg_rec->name) { pybind11::str arg_name(arg_rec->name); if (PySequence_Contains(kwnames.ptr(), arg_name.ptr())) { bad_arg = true; @@ -1074,7 +1074,8 @@ class cpp_function : public function { size_t args_size = n_args_in - positional_args_copied; tuple extra_args(args_size); for (size_t i = 0; i < args_size; ++i) { - extra_args[i] = reinterpret_borrow(args_in_arr[positional_args_copied + i]); + extra_args[i] = reinterpret_borrow( + args_in_arr[positional_args_copied + i]); } call.args_ref = std::move(extra_args); } @@ -1095,9 +1096,9 @@ class cpp_function : public function { for (size_t i = 0; i < name_count; ++i) { if (used_i < used_kwargs.size() && used_kwargs[used_i] == i) { ++used_i; - } - else { - kwargs[kwnames[i]] = reinterpret_borrow(args_in_arr[n_args_in + i]); + } else { + kwargs[kwnames[i]] + = reinterpret_borrow(args_in_arr[n_args_in + i]); } } call.args.push_back(kwargs); From 893df5d85879483907ac5b9abdb7c9e43d319eab Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 25 Dec 2025 21:19:09 -0500 Subject: [PATCH 08/35] Use the names tuple directly, cleaner code and less reference counting --- include/pybind11/pybind11.h | 82 +++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 25f50de7e0..23f1c59942 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -865,8 +865,6 @@ class cpp_function : public function { handle parent = n_args_in > 0 ? args_in_arr[0] : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; - auto kwnames = reinterpret_borrow(kwnames_in); - auto self_value_and_holder = value_and_holder(); if (overloads->is_constructor) { if (!parent @@ -970,12 +968,10 @@ class cpp_function : public function { example, the call site is: foo(0, key=1) but our overload is foo(key:int) then this call can't be for us, because it would be invalid. */ - if (kwnames && arg_rec && arg_rec->name) { - pybind11::str arg_name(arg_rec->name); - if (PySequence_Contains(kwnames.ptr(), arg_name.ptr())) { - bad_arg = true; - break; - } + if (kwnames_in && arg_rec && arg_rec->name + && keyword_index(kwnames_in, arg_rec->name) >= 0) { + bad_arg = true; + break; } handle arg(args_in_arr[args_copied]); @@ -1014,23 +1010,19 @@ class cpp_function : public function { // 2. Check kwargs and, failing that, defaults that may help complete the list small_vector used_kwargs; - if (kwnames) - used_kwargs.reserve(kwnames.size()); + if (kwnames_in) { + used_kwargs.reserve(PyTuple_GET_SIZE(kwnames_in)); + } if (args_copied < num_args) { for (; args_copied < num_args; ++args_copied) { const auto &arg_rec = func.args[args_copied]; handle value; - if (kwnames && arg_rec.name) { - auto n = n_args_in; - pybind11::str arg_name(arg_rec.name); - for (auto name : kwnames) { - if (PyUnicode_Compare(name.ptr(), arg_name.ptr()) == 0) { - value = args_in_arr[n]; - used_kwargs.emplace_back(n); - break; - } - ++n; + if (kwnames_in && arg_rec.name) { + ssize_t i = keyword_index(kwnames_in, arg_rec.name); + if (i >= 0) { + value = args_in_arr[n_args_in + i]; + used_kwargs.emplace_back(i); } } @@ -1062,7 +1054,8 @@ class cpp_function : public function { } // 3. Check everything was consumed (unless we have a kwargs arg) - if (!func.has_kwargs && kwnames && kwnames.size() > used_kwargs.size()) { + if (!func.has_kwargs && kwnames_in + && PyTuple_GET_SIZE(kwnames_in) > static_cast(used_kwargs.size())) { continue; // Unconsumed kwargs, but no py::kwargs argument to accept them } @@ -1074,8 +1067,7 @@ class cpp_function : public function { size_t args_size = n_args_in - positional_args_copied; tuple extra_args(args_size); for (size_t i = 0; i < args_size; ++i) { - extra_args[i] = reinterpret_borrow( - args_in_arr[positional_args_copied + i]); + extra_args[i] = args_in_arr[positional_args_copied + i]; } call.args_ref = std::move(extra_args); } @@ -1091,14 +1083,13 @@ class cpp_function : public function { if (func.has_kwargs) { dict kwargs; size_t used_i = 0; - size_t name_count = kwnames ? kwnames.size() : 0; + size_t name_count = kwnames_in ? PyTuple_GET_SIZE(kwnames_in) : 0; used_kwargs.sort(); for (size_t i = 0; i < name_count; ++i) { if (used_i < used_kwargs.size() && used_kwargs[used_i] == i) { ++used_i; } else { - kwargs[kwnames[i]] - = reinterpret_borrow(args_in_arr[n_args_in + i]); + kwargs[PyTuple_GET_ITEM(kwnames_in, i)] = args_in_arr[n_args_in + i]; } } call.args.push_back(kwargs); @@ -1106,8 +1097,9 @@ class cpp_function : public function { call.kwargs_ref = std::move(kwargs); } -// 5. Put everything in a vector. Not technically step 5, we've been building it -// in `call.args` all along. + // 5. Put everything in a vector. Not technically step 5, we've been building it + // in `call.args` all along. + #if defined(PYBIND11_DETAILED_ERROR_MESSAGES) if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs) { pybind11_fail("Internal error: function call dispatcher inserted wrong number " @@ -1239,8 +1231,7 @@ class cpp_function : public function { } msg += "\nInvoked with: "; bool some_args = false; - size_t ti = overloads->is_constructor ? 1 : 0; - for (; ti < n_args_in; ++ti) { + for (size_t ti = overloads->is_constructor ? 1 : 0; ti < n_args_in; ++ti) { if (!some_args) { some_args = true; } else { @@ -1252,26 +1243,25 @@ class cpp_function : public function { msg += ""; } } - if (kwnames && !kwnames.empty()) { + if (kwnames_in && PyTuple_GET_SIZE(kwnames_in) > 0) { if (some_args) { msg += "; "; } msg += "kwargs: "; bool first = true; - for (const auto &kwarg : kwnames) { + for (ssize_t i = 0; i < PyTuple_GET_SIZE(kwnames_in); ++i) { if (first) { first = false; } else { msg += ", "; } - msg += pybind11::str(kwarg); + msg += reinterpret_borrow(PyTuple_GET_ITEM(kwnames_in, i)); msg += '='; try { - msg += pybind11::repr(args_in_arr[ti]); + msg += pybind11::repr(args_in_arr[n_args_in + i]); } catch (const error_already_set &) { msg += ""; } - ++ti; } } @@ -1305,6 +1295,28 @@ class cpp_function : public function { } return result.ptr(); } + + static ssize_t keyword_index(PyObject *haystack, char const *needle) { + /* kwargs is usually very small (<= 5 entries). The arg strings are typically interned. + * CPython itself implements the search this way, first comparing all pointers ... which is + * cheap and will work if the strings are interned. If it fails, then it falls back to a + * second lexicographic check. This is wildly expensive for huge argument lists, but those + * are incredably rare so we optimize for the vastly common case of just a couple of args. + */ + auto n = PyTuple_GET_SIZE(haystack); + auto s = reinterpret_steal(PyUnicode_InternFromString(needle)); + for (ssize_t i = 0; i < n; ++i) { + if (PyTuple_GET_ITEM(haystack, i) == s.ptr()) { + return i; + } + } + for (ssize_t i = 0; i < n; ++i) { + if (PyUnicode_Compare(PyTuple_GET_ITEM(haystack, i), s.ptr()) == 0) { + return i; + } + } + return -1; + } }; PYBIND11_NAMESPACE_BEGIN(detail) From 655db8a7900dd848cbd0b17ad6d3ccd5891e3aab Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 25 Dec 2025 21:22:47 -0500 Subject: [PATCH 09/35] Fix unit test, the code now holds more references It cannot re-use the incoming tuple as before, because it is no longer a tuple at all. So a new tuple must be created, which then holds references for each member. --- include/pybind11/pybind11.h | 2 +- tests/test_kwargs_and_defaults.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 23f1c59942..918b7518d8 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1301,7 +1301,7 @@ class cpp_function : public function { * CPython itself implements the search this way, first comparing all pointers ... which is * cheap and will work if the strings are interned. If it fails, then it falls back to a * second lexicographic check. This is wildly expensive for huge argument lists, but those - * are incredably rare so we optimize for the vastly common case of just a couple of args. + * are incredibly rare so we optimize for the vastly common case of just a couple of args. */ auto n = PyTuple_GET_SIZE(haystack); auto s = reinterpret_steal(PyUnicode_InternFromString(needle)); diff --git a/tests/test_kwargs_and_defaults.py b/tests/test_kwargs_and_defaults.py index d41e505583..a7745d1ecb 100644 --- a/tests/test_kwargs_and_defaults.py +++ b/tests/test_kwargs_and_defaults.py @@ -458,7 +458,8 @@ def test_args_refcount(): assert refcount(myval) == expected exp3 = refcount(myval, myval, myval) - assert m.args_refcount(myval, myval, myval) == (exp3, exp3, exp3) + # if we have to create a new tuple internally, then it will hold an extra reference for each item in it. + assert m.args_refcount(myval, myval, myval) == (exp3 + 3, exp3 + 3, exp3 + 3) assert refcount(myval) == expected # This function takes the first arg as a `py::object` and the rest as a `py::args`. Unlike the From c43b156b36a8bb7262f29d001c7ade4a6b4c626f Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 25 Dec 2025 21:39:16 -0500 Subject: [PATCH 10/35] Make clangtidy happy --- include/pybind11/detail/argument_vector.h | 2 +- include/pybind11/pybind11.h | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 532e324623..61f1352897 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -197,7 +197,7 @@ struct small_vector { } void sort() { - T *begin; + T *begin = nullptr; if (is_inline()) { begin = &m_repr.iarray.arr[0]; } else { diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 918b7518d8..cea8d3d7b5 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -847,10 +847,8 @@ class cpp_function : public function { } /// Main dispatch logic for calls to functions bound using pybind11 - static PyObject *dispatcher(PyObject *self, - PyObject *const *args_in_arr, - Py_ssize_t nargsf, - PyObject *kwnames_in) { + static PyObject * + dispatcher(PyObject *self, PyObject *const *args_in_arr, size_t nargsf, PyObject *kwnames_in) { using namespace detail; const function_record *overloads = function_record_ptr_from_PyObject(self); assert(overloads != nullptr); @@ -860,7 +858,7 @@ class cpp_function : public function { /* Need to know how many arguments + keyword arguments there are to pick the right overload */ - const size_t n_args_in = PyVectorcall_NARGS(nargsf); + const auto n_args_in = static_cast(PyVectorcall_NARGS(nargsf)); handle parent = n_args_in > 0 ? args_in_arr[0] : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; @@ -1011,7 +1009,7 @@ class cpp_function : public function { // 2. Check kwargs and, failing that, defaults that may help complete the list small_vector used_kwargs; if (kwnames_in) { - used_kwargs.reserve(PyTuple_GET_SIZE(kwnames_in)); + used_kwargs.reserve(static_cast(PyTuple_GET_SIZE(kwnames_in))); } if (args_copied < num_args) { for (; args_copied < num_args; ++args_copied) { @@ -1021,8 +1019,8 @@ class cpp_function : public function { if (kwnames_in && arg_rec.name) { ssize_t i = keyword_index(kwnames_in, arg_rec.name); if (i >= 0) { - value = args_in_arr[n_args_in + i]; - used_kwargs.emplace_back(i); + value = args_in_arr[n_args_in + static_cast(i)]; + used_kwargs.emplace_back(static_cast(i)); } } @@ -1083,7 +1081,8 @@ class cpp_function : public function { if (func.has_kwargs) { dict kwargs; size_t used_i = 0; - size_t name_count = kwnames_in ? PyTuple_GET_SIZE(kwnames_in) : 0; + size_t name_count + = kwnames_in ? static_cast(PyTuple_GET_SIZE(kwnames_in)) : 0; used_kwargs.sort(); for (size_t i = 0; i < name_count; ++i) { if (used_i < used_kwargs.size() && used_kwargs[used_i] == i) { @@ -1249,7 +1248,7 @@ class cpp_function : public function { } msg += "kwargs: "; bool first = true; - for (ssize_t i = 0; i < PyTuple_GET_SIZE(kwnames_in); ++i) { + for (size_t i = 0; i < static_cast(PyTuple_GET_SIZE(kwnames_in)); ++i) { if (first) { first = false; } else { From d881a7ba2f87a70ba6f0a136e011fe45b5ad0575 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 26 Dec 2025 08:51:50 -0500 Subject: [PATCH 11/35] Oops, _v is C++14 --- include/pybind11/detail/argument_vector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 61f1352897..b9131f0952 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -63,7 +63,7 @@ union inline_array_or_vector { heap_vector(std::size_t count, VectorT value) : vec(count, value) {} }; - static_assert(std::is_nothrow_move_assignable_v, "this class is not exception safe"); + static_assert(std::is_nothrow_move_assignable::value, "this class is not exception safe"); inline_array iarray; heap_vector hvector; @@ -216,7 +216,7 @@ struct small_vector { using heap_vector = typename repr_type::heap_vector; heap_vector hv; hv.vec.reserve(reserved_size); - static_assert(std::is_nothrow_move_assignable_v, "this class is not exception safe"); + static_assert(std::is_nothrow_move_assignable::value, "this class is not exception safe"); std::move(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); new (&m_repr.hvector) heap_vector(std::move(hv)); } From 76831eccc0149f000d3dfa7c14a9ecbee9a174d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Dec 2025 13:52:25 +0000 Subject: [PATCH 12/35] style: pre-commit fixes --- include/pybind11/detail/argument_vector.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index b9131f0952..f3a8753c0e 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -63,7 +63,8 @@ union inline_array_or_vector { heap_vector(std::size_t count, VectorT value) : vec(count, value) {} }; - static_assert(std::is_nothrow_move_assignable::value, "this class is not exception safe"); + static_assert(std::is_nothrow_move_assignable::value, + "this class is not exception safe"); inline_array iarray; heap_vector hvector; @@ -216,7 +217,8 @@ struct small_vector { using heap_vector = typename repr_type::heap_vector; heap_vector hv; hv.vec.reserve(reserved_size); - static_assert(std::is_nothrow_move_assignable::value, "this class is not exception safe"); + static_assert(std::is_nothrow_move_assignable::value, + "this class is not exception safe"); std::move(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); new (&m_repr.hvector) heap_vector(std::move(hv)); } From 0285818b4e3629edbc5506d7703b596f0cea1f63 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 26 Dec 2025 09:04:41 -0500 Subject: [PATCH 13/35] Minor code cleanup --- include/pybind11/detail/argument_vector.h | 29 +++++++++++------------ 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index f3a8753c0e..995746109c 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -63,9 +63,6 @@ union inline_array_or_vector { heap_vector(std::size_t count, VectorT value) : vec(count, value) {} }; - static_assert(std::is_nothrow_move_assignable::value, - "this class is not exception safe"); - inline_array iarray; heap_vector hvector; @@ -84,20 +81,16 @@ union inline_array_or_vector { return result; } - void destruct() { - if (!is_inline()) { + inline_array_or_vector() : iarray() {} + + ~inline_array_or_vector() { + if (is_inline()) { + iarray.~inline_array(); + } else { hvector.~heap_vector(); - } else if (!std::is_trivially_destructible::value) { - for (size_t i = 0; i < iarray.size; ++i) { - iarray.arr[i].~ArrayT(); - } } } - inline_array_or_vector() : iarray() {} - - ~inline_array_or_vector() { destruct(); } - // Disable copy ctor and assignment. inline_array_or_vector(const inline_array_or_vector &) = delete; inline_array_or_vector &operator=(const inline_array_or_vector &) = delete; @@ -116,7 +109,11 @@ union inline_array_or_vector { return *this; } - destruct(); + if (is_inline()) { + iarray.~inline_array(); + } else { + hvector.~heap_vector(); + } if (rhs.is_inline()) { new (&iarray) inline_array(std::move(rhs.iarray)); @@ -218,7 +215,9 @@ struct small_vector { heap_vector hv; hv.vec.reserve(reserved_size); static_assert(std::is_nothrow_move_assignable::value, - "this class is not exception safe"); + "this conversion is not exception safe"); + static_assert(std::is_nothrow_move_constructible::value, + "this conversion is not exception safe"); std::move(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); new (&m_repr.hvector) heap_vector(std::move(hv)); } From cc2b5a9317d49cd5bda27f169193a6193de31104 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 26 Dec 2025 09:28:18 -0500 Subject: [PATCH 14/35] Fix signed conversions --- include/pybind11/cast.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 7baef34c86..9615f12ab7 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2411,7 +2411,7 @@ class unpacking_vectorcall_collector { // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET size_t nargs = m_args.size() - 1; if (m_names) { - nargs -= PyTuple_GET_SIZE(m_names.ptr()); + nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); } PyObject *result = PyObject_Vectorcall( ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); @@ -2425,7 +2425,7 @@ class unpacking_vectorcall_collector { // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET size_t nargs = m_args.size() - 1; if (m_names) { - nargs -= PyTuple_GET_SIZE(m_names.ptr()); + nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); } tuple val(nargs); for (size_t i = 0; i < nargs; ++i) { From 25485b370c3b75216bff12506278a48b5eea8a09 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 26 Dec 2025 17:59:14 -0500 Subject: [PATCH 15/35] Fix args expansion This would be easier with `if constexpr` --- include/pybind11/cast.h | 61 ++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 9615f12ab7..c2fa9650d0 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2346,15 +2346,6 @@ constexpr bool args_are_all_positional() { return all_of...>::value; } -// [workaround(intel)] Separate function required here -// We need to put this into a separate function because the Intel compiler -// fails to compile enable_if_t...>::value> -// (tested with ICC 2021.1 Beta 20200827). -template -constexpr bool args_have_kw() { - return none_of...>::value; -} - #if PY_VERSION_HEX < 0x030C0000 /// Collect only positional arguments for a Python function call template explicit unpacking_vectorcall_collector(Ts &&...values) { - m_args.reserve(sizeof...(values) + 1); m_args.push_back( nullptr); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + object names_list; // null or a list of names + // collect_arguments guarantees this can't be constructed with kwargs before the last // positional so we don't need to worry about Ts... being in anything but normal python // order. using expander = int[]; - if (args_have_kw()) { - (void) expander{0, (positional(std::forward(values)), 0)...}; - } else { - list names_list; - - (void) expander{0, (process(names_list, std::forward(values)), 0)...}; + (void) expander{0, (process(names_list, std::forward(values)), 0)...}; - if (!names_list.empty()) - m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); + if (names_list) { + m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); } } @@ -2438,7 +2425,7 @@ class unpacking_vectorcall_collector { dict kwargs() const { dict val; if (m_names) { - tuple namestup(m_names); + auto namestup = reinterpret_borrow(m_names); size_t offset = m_args.size() - namestup.size(); for (size_t i = 0; i < namestup.size(); ++i, ++offset) { val[namestup[i]] = reinterpret_borrow(m_args[offset]); @@ -2450,7 +2437,7 @@ class unpacking_vectorcall_collector { private: // normal argument, possibly needing conversion template - void positional(T &&x) { + void process(object & /*names_list*/, T &&x) { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { @@ -2465,7 +2452,8 @@ class unpacking_vectorcall_collector { m_temp.push_back(std::move(o)); } - void positional(detail::args_proxy ap) { + // * unpacking + void process(object & /*names_list*/, detail::args_proxy ap) { if (!ap) { return; } @@ -2475,17 +2463,11 @@ class unpacking_vectorcall_collector { } } - // normal argument, possibly needing conversion - template - void process(list & /*names_list*/, T &&x) { - positional(std::forward(x)); - } - - // * unpacking - void process(list & /*names_list*/, detail::args_proxy ap) { positional(ap); } - // named argument - void process(list &names_list, arg_v a) { + void process(object &names_list, arg_v a) { + if (!names_list) { + names_list = list(); + } if (!a.name) { # if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); @@ -2508,16 +2490,22 @@ class unpacking_vectorcall_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); # endif } - names_list.append(std::move(name)); + if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) + { + throw error_already_set(); + } m_temp.push_back(a.value); // keep alive m_args.push_back(a.value.ptr()); } // ** unpacking - void process(list &names_list, detail::kwargs_proxy kp) { + void process(object &names_list, detail::kwargs_proxy kp) { if (!kp) { return; } + if (!names_list) { + names_list = list(); + } for (auto &&k : reinterpret_borrow(kp)) { auto name = str(k.first); if (names_list.contains(name)) { @@ -2527,7 +2515,10 @@ class unpacking_vectorcall_collector { multiple_values_error(name); # endif } - names_list.append(std::move(name)); + if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) + { + throw error_already_set(); + } m_temp.push_back(reinterpret_borrow(k.second)); // keep alive m_args.push_back(k.second.ptr()); } @@ -2556,7 +2547,7 @@ class unpacking_vectorcall_collector { private: small_vector m_args; - object m_names; // todo: use m_temp for this? + object m_names; // null or a tuple of names small_vector m_temp; }; From 6c6bd71ea81773f2df39690400a8c1f1c4b269d4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Dec 2025 23:00:07 +0000 Subject: [PATCH 16/35] style: pre-commit fixes --- include/pybind11/cast.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index c2fa9650d0..95d20d7587 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2490,8 +2490,7 @@ class unpacking_vectorcall_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); # endif } - if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) - { + if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); } m_temp.push_back(a.value); // keep alive @@ -2515,8 +2514,7 @@ class unpacking_vectorcall_collector { multiple_values_error(name); # endif } - if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) - { + if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); } m_temp.push_back(reinterpret_borrow(k.second)); // keep alive From afd62995592dc05bef4eb06a5eaa921208904f5c Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 26 Dec 2025 18:04:51 -0500 Subject: [PATCH 17/35] Code cleanup --- include/pybind11/detail/argument_vector.h | 45 ++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 995746109c..9afbabe013 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -66,21 +66,6 @@ union inline_array_or_vector { inline_array iarray; heap_vector hvector; - bool is_inline() const { - // It is undefined behavior to access the inactive member of a - // union directly. However, it is well-defined to reinterpret_cast any - // pointer into a pointer to char and examine it as an array - // of bytes. See - // https://dev-discuss.pytorch.org/t/unionizing-for-profit-how-to-exploit-the-power-of-unions-in-c/444#the-memcpy-loophole-4 - bool result = false; - static_assert(offsetof(inline_array, is_inline) == 0, - "untagged union implementation relies on this"); - static_assert(offsetof(heap_vector, is_inline) == 0, - "untagged union implementation relies on this"); - std::memcpy(&result, reinterpret_cast(this), sizeof(bool)); - return result; - } - inline_array_or_vector() : iarray() {} ~inline_array_or_vector() { @@ -122,6 +107,21 @@ union inline_array_or_vector { } return *this; } + + bool is_inline() const { + // It is undefined behavior to access the inactive member of a + // union directly. However, it is well-defined to reinterpret_cast any + // pointer into a pointer to char and examine it as an array + // of bytes. See + // https://dev-discuss.pytorch.org/t/unionizing-for-profit-how-to-exploit-the-power-of-unions-in-c/444#the-memcpy-loophole-4 + bool result = false; + static_assert(offsetof(inline_array, is_inline) == 0, + "untagged union implementation relies on this"); + static_assert(offsetof(heap_vector, is_inline) == 0, + "untagged union implementation relies on this"); + std::memcpy(&result, reinterpret_cast(this), sizeof(bool)); + return result; + } }; template @@ -144,7 +144,7 @@ struct small_vector { T const *data() const { if (is_inline()) { - return &m_repr.iarray.arr[0]; + return m_repr.iarray.arr.data(); } return m_repr.hvector.vec.data(); } @@ -157,7 +157,7 @@ struct small_vector { return m_repr.hvector.vec[idx]; } - T operator[](std::size_t idx) const { + T const &operator[](std::size_t idx) const { assert(idx < size()); if (is_inline()) { return m_repr.iarray.arr[idx]; @@ -195,13 +195,16 @@ struct small_vector { } void sort() { - T *begin = nullptr; if (is_inline()) { - begin = &m_repr.iarray.arr[0]; + if (m_repr.iarray.size < m_repr.iarray.arr.size()) { + std::sort(m_repr.iarray.arr.begin(), + m_repr.iarray.arr.begin() + m_repr.iarray.size); + } else { + std::sort(m_repr.iarray.arr.begin(), m_repr.iarray.arr.end()); + } } else { - begin = m_repr.hvector.vec.data(); + std::sort(m_repr.hvector.vec.begin(), m_repr.hvector.vec.end()); } - std::sort(begin, begin + size()); } private: From 6ed6d5a83ddb0bead44016d2f306ae4ab8ec8990 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 26 Dec 2025 20:31:37 -0800 Subject: [PATCH 18/35] fix(tests): Install multiple-interpreter test modules into wheel The `mod_per_interpreter_gil`, `mod_shared_interpreter_gil`, and `mod_per_interpreter_gil_with_singleton` modules were being built but not installed into the wheel when using scikit-build-core (SKBUILD=true). This caused iOS (and potentially Android) CIBW tests to fail with ModuleNotFoundError. Root cause analysis: - The main test targets have install() commands (line 531) - The PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES were missing equivalent install() commands - For regular CMake builds, this wasn't a problem because LIBRARY_OUTPUT_DIRECTORY places the modules next to pybind11_tests - For wheel builds, only targets with explicit install() commands are included in the wheel This issue was latent until commit fee2527d changed the test imports from `pytest.importorskip()` (graceful skip) to direct `import` statements (hard failure), which exposed the missing modules. Failing tests: - test_multiple_interpreters.py::test_independent_subinterpreters - test_multiple_interpreters.py::test_dependent_subinterpreters Error: ModuleNotFoundError: No module named 'mod_per_interpreter_gil' --- tests/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 15e18705be..fdf33628aa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -602,6 +602,12 @@ if(NOT PYBIND11_CUDA_TESTS) endif() endforeach() + if(SKBUILD) + foreach(mod IN LISTS PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES) + install(TARGETS "${mod}" LIBRARY DESTINATION .) + endforeach() + endif() + if(PYBIND11_TEST_SMART_HOLDER) foreach(mod IN LISTS PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES) target_compile_definitions( From ca72d6c1a23d1fd069c5374cf5944f0932f26bc9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 26 Dec 2025 20:58:47 -0800 Subject: [PATCH 19/35] tests: Pin numpy 2.4.0 for Python 3.14 CI tests Add numpy==2.4.0 requirement for Python 3.14 (both default and free-threaded builds). NumPy 2.4.0 is the first version to provide official PyPI wheels for Python 3.14: - numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64...whl (default) - numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64...whl (free-threaded) Previously, CI was skipping all numpy-dependent tests for Python 3.14 because PIP_ONLY_BINARY was set and no wheels were available: SKIPPED [...] test_numpy_array.py:8: could not import 'numpy': No module named 'numpy' With this change, the full numpy test suite will run on Python 3.14, providing better test coverage for the newest Python version. Note: Using exact pin (==2.4.0) rather than compatible release (~=2.4.0) to ensure reproducible CI results with the first known-working version. --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 41fc9f1439..3d61669234 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -10,6 +10,7 @@ numpy~=1.22.2; platform_python_implementation=="CPython" and python_version=="3. numpy~=1.26.0; platform_python_implementation=="CPython" and python_version>="3.11" and python_version<"3.13" and platform_machine!="ARM64" numpy~=2.3.0; platform_python_implementation=="CPython" and python_version>="3.11" and platform_machine=="ARM64" numpy~=2.2.0; platform_python_implementation=="CPython" and python_version=="3.13" and platform_machine!="ARM64" +numpy==2.4.0; platform_python_implementation=="CPython" and python_version>="3.14" pytest>=6 pytest-timeout scipy~=1.5.4; platform_python_implementation=="CPython" and python_version<"3.10" From 7148a4a79330b632912f9807bdeee24f09ae4d37 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 26 Dec 2025 22:05:45 -0800 Subject: [PATCH 20/35] tests: Add verbose flag to CIBW pytest command Add `-v` to the pytest command in tests/pyproject.toml to help diagnose hanging tests in CIBW jobs (particularly iOS). This will show each test name as it runs, making it easier to identify which specific test is hanging. --- tests/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pyproject.toml b/tests/pyproject.toml index fa478122d3..f1cf82e890 100644 --- a/tests/pyproject.toml +++ b/tests/pyproject.toml @@ -27,7 +27,7 @@ PYBIND11_FINDPYTHON = true [tool.cibuildwheel] test-sources = ["tests", "pyproject.toml"] -test-command = "python -m pytest -o timeout=0 -p no:cacheprovider tests" +test-command = "python -m pytest -v -o timeout=0 -p no:cacheprovider tests" environment.PIP_ONLY_BINARY = "numpy" environment.PIP_PREFER_BINARY = "1" From bc9c877c67023926a89f5efd465592ce74df1b51 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 26 Dec 2025 22:17:44 -0800 Subject: [PATCH 21/35] tests: Skip subinterpreter tests on iOS, add pytest timeout - Add `IOS` platform constant to `tests/env.py` for consistency with existing `ANDROID`, `LINUX`, `MACOS`, `WIN`, `FREEBSD` constants. - Skip `test_multiple_interpreters.py` module on iOS. Subinterpreters are not supported in the iOS simulator environment. These tests were previously skipped implicitly because the modules weren't installed in the wheel; now that they are (commit 6ed6d5a8), we need an explicit skip. - Change pytest timeout from 0 (disabled) to 120 seconds. This provides a safety net to catch hanging tests before the CI job times out after hours. Normal test runs complete in 33-55 seconds total (~1100 tests), so 120 seconds per test is very generous. - Add `-v` flag for verbose output to help diagnose any future issues. --- tests/env.py | 1 + tests/pyproject.toml | 2 +- tests/test_multiple_interpreters.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/env.py b/tests/env.py index 4b48e91930..ee932ad77a 100644 --- a/tests/env.py +++ b/tests/env.py @@ -5,6 +5,7 @@ import sysconfig ANDROID = sys.platform.startswith("android") +IOS = sys.platform.startswith("ios") LINUX = sys.platform.startswith("linux") MACOS = sys.platform.startswith("darwin") WIN = sys.platform.startswith("win32") or sys.platform.startswith("cygwin") diff --git a/tests/pyproject.toml b/tests/pyproject.toml index f1cf82e890..04db5cf264 100644 --- a/tests/pyproject.toml +++ b/tests/pyproject.toml @@ -27,7 +27,7 @@ PYBIND11_FINDPYTHON = true [tool.cibuildwheel] test-sources = ["tests", "pyproject.toml"] -test-command = "python -m pytest -v -o timeout=0 -p no:cacheprovider tests" +test-command = "python -m pytest -v -o timeout=120 -p no:cacheprovider tests" environment.PIP_ONLY_BINARY = "numpy" environment.PIP_PREFER_BINARY = "1" diff --git a/tests/test_multiple_interpreters.py b/tests/test_multiple_interpreters.py index aadf3d96bb..44877e772a 100644 --- a/tests/test_multiple_interpreters.py +++ b/tests/test_multiple_interpreters.py @@ -9,8 +9,12 @@ import pytest +import env import pybind11_tests +if env.IOS: + pytest.skip("Subinterpreters not supported on iOS", allow_module_level=True) + # 3.14.0b3+, though sys.implementation.supports_isolated_interpreters is being added in b4 # Can be simplified when we drop support for the first three betas CONCURRENT_INTERPRETERS_SUPPORT = ( From 849df0bccb07647479754e5213a506c89b4fa7c3 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 15:40:21 -0500 Subject: [PATCH 22/35] More cleanups in argument vector, per comments. --- include/pybind11/detail/argument_vector.h | 48 +++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 9afbabe013..89427c4ec0 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -165,25 +165,23 @@ struct small_vector { return m_repr.hvector.vec[idx]; } - void push_back(T x) { + void push_back(T const &x) { emplace_back(x); } + + template + void emplace_back(Args &&...x) { if (is_inline()) { auto &ha = m_repr.iarray; if (ha.size == InlineSize) { move_to_heap_vector_with_reserved_size(InlineSize + 1); - push_back_slow_path(std::move(x)); + m_repr.hvector.vec.emplace_back(std::forward(x)...); } else { - ha.arr[ha.size++] = std::move(x); + ha.arr[ha.size++] = T(std::forward(x)...); } } else { - push_back_slow_path(std::move(x)); + m_repr.hvector.vec.emplace_back(std::forward(x)...); } } - template - void emplace_back(Arg &&x) { - push_back(T(x)); - } - void reserve(std::size_t sz) { if (is_inline()) { if (sz > InlineSize) { @@ -194,19 +192,6 @@ struct small_vector { } } - void sort() { - if (is_inline()) { - if (m_repr.iarray.size < m_repr.iarray.arr.size()) { - std::sort(m_repr.iarray.arr.begin(), - m_repr.iarray.arr.begin() + m_repr.iarray.size); - } else { - std::sort(m_repr.iarray.arr.begin(), m_repr.iarray.arr.end()); - } - } else { - std::sort(m_repr.hvector.vec.begin(), m_repr.hvector.vec.end()); - } - } - private: using repr_type = inline_array_or_vector; repr_type m_repr; @@ -225,8 +210,6 @@ struct small_vector { new (&m_repr.hvector) heap_vector(std::move(hv)); } - PYBIND11_NOINLINE void push_back_slow_path(T x) { m_repr.hvector.vec.push_back(std::move(x)); } - PYBIND11_NOINLINE void reserve_slow_path(std::size_t sz) { m_repr.hvector.vec.reserve(sz); } bool is_inline() const { return m_repr.is_inline(); } @@ -303,6 +286,23 @@ struct small_vector { } } + void set(std::size_t idx, bool value = true) { + if (is_inline()) { + auto &ha = m_repr.iarray; + assert(ha.size < kInlineSize); + const auto wbi = word_and_bit_index(idx); + assert(wbi.word < kWords); + assert(wbi.bit < kBitsPerWord); + if (value) { + ha.arr[wbi.word] |= (static_cast(1) << wbi.bit); + } else { + ha.arr[wbi.word] &= ~(static_cast(1) << wbi.bit); + } + } else { + m_repr.hvector.vec[idx] = value; + } + } + void swap(small_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); } private: From b517942b5dd42c317b40eac38946b358e997afa6 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 15:41:28 -0500 Subject: [PATCH 23/35] Per Cursor, move all versions to Vectorcall since it has been supported since 3.8. This means getting rid of simple_collector, we can do the same with a constexpr if in the unpacking_collector. --- include/pybind11/cast.h | 276 ++++++++-------------------------------- 1 file changed, 55 insertions(+), 221 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 95d20d7587..5ad80fdd36 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2191,216 +2191,56 @@ class argument_loader { std::tuple...> argcasters; }; -/// Helper class which collects only positional arguments for a Python function call. -/// A fancier version below can collect any argument, but this one is optimal for simple calls. -template -class simple_collector { -public: - template - explicit simple_collector(Ts &&...values) - : m_args(pybind11::make_tuple(std::forward(values)...)) {} - - const tuple &args() const & { return m_args; } - dict kwargs() const { return {}; } - - tuple args() && { return std::move(m_args); } - - /// Call a Python function and pass the collected arguments - object call(PyObject *ptr) const { - PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); - if (!result) { - throw error_already_set(); - } - return reinterpret_steal(result); - } - -private: - tuple m_args; -}; - -/// Helper class which collects positional, keyword, * and ** arguments for a Python function call -template -class unpacking_collector { -public: - template - explicit unpacking_collector(Ts &&...values) { - // Tuples aren't (easily) resizable so a list is needed for collection, - // but the actual function call strictly requires a tuple. - auto args_list = list(); - using expander = int[]; - (void) expander{0, (process(args_list, std::forward(values)), 0)...}; - - m_args = std::move(args_list); - } - - const tuple &args() const & { return m_args; } - const dict &kwargs() const & { return m_kwargs; } - - tuple args() && { return std::move(m_args); } - dict kwargs() && { return std::move(m_kwargs); } - - /// Call a Python function and pass the collected arguments - object call(PyObject *ptr) const { - PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); - if (!result) { - throw error_already_set(); - } - return reinterpret_steal(result); - } - -private: - template - void process(list &args_list, T &&x) { - auto o = reinterpret_steal( - detail::make_caster::cast(std::forward(x), policy, {})); - if (!o) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size())); -#else - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()), - type_id()); -#endif - } - args_list.append(std::move(o)); - } - - void process(list &args_list, detail::args_proxy ap) { - for (auto a : ap) { - args_list.append(a); - } - } - - void process(list & /*args_list*/, arg_v a) { - if (!a.name) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - nameless_argument_error(); -#else - nameless_argument_error(a.type); -#endif - } - if (m_kwargs.contains(a.name)) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - multiple_values_error(); -#else - multiple_values_error(a.name); -#endif - } - if (!a.value) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(a.name); -#else - throw cast_error_unable_to_convert_call_arg(a.name, a.type); -#endif - } - m_kwargs[a.name] = std::move(a.value); - } - - void process(list & /*args_list*/, detail::kwargs_proxy kp) { - if (!kp) { - return; - } - for (auto k : reinterpret_borrow(kp)) { - if (m_kwargs.contains(k.first)) { -#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - multiple_values_error(); -#else - multiple_values_error(str(k.first)); -#endif - } - m_kwargs[k.first] = k.second; - } - } - - [[noreturn]] static void nameless_argument_error() { - throw type_error( - "Got kwargs without a name; only named arguments " - "may be passed via py::arg() to a python function call. " - "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)"); - } - [[noreturn]] static void nameless_argument_error(const std::string &type) { - throw type_error("Got kwargs without a name of type '" + type - + "'; only named " - "arguments may be passed via py::arg() to a python function call. "); - } - [[noreturn]] static void multiple_values_error() { - throw type_error( - "Got multiple values for keyword argument " - "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)"); - } - - [[noreturn]] static void multiple_values_error(const std::string &name) { - throw type_error("Got multiple values for keyword argument '" + name + "'"); - } - -private: - tuple m_args; - dict m_kwargs; -}; - // [workaround(intel)] Separate function required here // We need to put this into a separate function because the Intel compiler // fails to compile enable_if_t...>::value> // (tested with ICC 2021.1 Beta 20200827). template -constexpr bool args_are_all_positional() { - return all_of...>::value; -} - -#if PY_VERSION_HEX < 0x030C0000 -/// Collect only positional arguments for a Python function call -template ()>> -simple_collector collect_arguments(Args &&...args) { - return simple_collector(std::forward(args)...); +constexpr bool args_has_keyword_or_ds() { + return any_of...>::value; } -/// Collect all arguments, including keywords and unpacking (only instantiated when needed) -template ()>> -unpacking_collector collect_arguments(Args &&...args) { - // Following argument order rules for generalized unpacking according to PEP 448 - static_assert(constexpr_last() - < constexpr_first() - && constexpr_last() - < constexpr_first(), - "Invalid function call: positional args must precede keywords and ** unpacking; " - "* unpacking must precede ** unpacking"); - return unpacking_collector(std::forward(args)...); -} -#else /// Helper class which collects positional, keyword, * and ** arguments for a Python function call template -class unpacking_vectorcall_collector { +class unpacking_collector { public: template - explicit unpacking_vectorcall_collector(Ts &&...values) { + explicit unpacking_collector(Ts &&...values) { + /* + Python can sometimes utilize an extra space before the arguments to add on `self`. + This is important enough that there is a special flag for it: + PY_VECTORCALL_ARGUMENTS_OFFSET. + All we have to do it allocate an extra space at the beginning of this array, and set the + flag. Note that the extra space is not passed directly in to vectorcall. + */ m_args.reserve(sizeof...(values) + 1); - m_args.push_back( - nullptr); // dummy first argument so we can use PY_VECTORCALL_ARGUMENTS_OFFSET + m_args.push_back(nullptr); - object names_list; // null or a list of names + if (args_has_keyword_or_ds()) { + object names_list = list(); - // collect_arguments guarantees this can't be constructed with kwargs before the last - // positional so we don't need to worry about Ts... being in anything but normal python - // order. - using expander = int[]; - (void) expander{0, (process(names_list, std::forward(values)), 0)...}; + // collect_arguments guarantees this can't be constructed with kwargs before the last + // positional so we don't need to worry about Ts... being in anything but normal python + // order. + using expander = int[]; + (void) expander{0, (process(names_list, std::forward(values)), 0)...}; - if (names_list) { m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); + } else { + object not_used; + + using expander = int[]; + (void) expander{0, (process(not_used, std::forward(values)), 0)...}; } } /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET - size_t nargs = m_args.size() - 1; + size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) if (m_names) { nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); } - PyObject *result = PyObject_Vectorcall( + PyObject *result = _PyObject_Vectorcall( ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); if (!result) { throw error_already_set(); @@ -2409,15 +2249,14 @@ class unpacking_vectorcall_collector { } tuple args() const { - // -1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET - size_t nargs = m_args.size() - 1; + size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) if (m_names) { nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); } tuple val(nargs); for (size_t i = 0; i < nargs; ++i) { - // +1 to account for PY_VECTORCALL_ARGUMENTS_OFFSET - val[i] = reinterpret_borrow(m_args[i + 1]); + val[i] = reinterpret_borrow( + m_args[i + 1]); // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) } return val; } @@ -2441,12 +2280,12 @@ class unpacking_vectorcall_collector { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { -# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1)); -# else +#else throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1), type_id()); -# endif +#endif } m_args.push_back(o.ptr()); m_temp.push_back(std::move(o)); @@ -2465,30 +2304,28 @@ class unpacking_vectorcall_collector { // named argument void process(object &names_list, arg_v a) { - if (!names_list) { - names_list = list(); - } + assert(names_list); if (!a.name) { -# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); -# else +#else nameless_argument_error(a.type); -# endif +#endif } auto name = str(a.name); if (names_list.contains(name)) { -# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); -# else +#else multiple_values_error(a.name); -# endif +#endif } if (!a.value) { -# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) throw cast_error_unable_to_convert_call_arg(a.name); -# else +#else throw cast_error_unable_to_convert_call_arg(a.name, a.type); -# endif +#endif } if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); @@ -2502,17 +2339,15 @@ class unpacking_vectorcall_collector { if (!kp) { return; } - if (!names_list) { - names_list = list(); - } + assert(names_list); for (auto &&k : reinterpret_borrow(kp)) { auto name = str(k.first); if (names_list.contains(name)) { -# if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) +#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); -# else +#else multiple_values_error(name); -# endif +#endif } if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); @@ -2549,19 +2384,18 @@ class unpacking_vectorcall_collector { small_vector m_temp; }; -/// Collect all arguments, including keywords and unpacking for vectorcall +/// Collect all arguments, including keywords and unpacking template -unpacking_vectorcall_collector collect_arguments(Args &&...args) { +unpacking_collector collect_arguments(Args &&...args) { // Following argument order rules for generalized unpacking according to PEP 448 - static_assert(constexpr_last() - < constexpr_first() - && constexpr_last() - < constexpr_first(), - "Invalid function call: positional args must precede keywords and ** unpacking; " - "* unpacking must precede ** unpacking"); - return unpacking_vectorcall_collector(std::forward(args)...); + static_assert( + constexpr_last() < constexpr_first(), + "Invalid function call: positional args must precede keywords and */** unpacking;"); + static_assert(constexpr_last() + < constexpr_first(), + "Invalid function call: * unpacking must precede ** unpacking"); + return unpacking_collector(std::forward(args)...); } -#endif template template From b4ca36645b9cae28b32da464dd90c222de8982ae Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 15:43:08 -0500 Subject: [PATCH 24/35] Switch to a bool vec for the used_kwargs flag... This makes more sense and saves a sort, and the small_vector implementation means it will actually take less space than a vector of size_t elements. The most common case is that all kwargs are used. --- include/pybind11/pybind11.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index cea8d3d7b5..deece1ae1e 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1007,10 +1007,9 @@ class cpp_function : public function { } // 2. Check kwargs and, failing that, defaults that may help complete the list - small_vector used_kwargs; - if (kwnames_in) { - used_kwargs.reserve(static_cast(PyTuple_GET_SIZE(kwnames_in))); - } + small_vector used_kwargs( + kwnames_in ? PyTuple_GET_SIZE(kwnames_in) : 0, false); + size_t used_kwargs_count = 0; if (args_copied < num_args) { for (; args_copied < num_args; ++args_copied) { const auto &arg_rec = func.args[args_copied]; @@ -1020,7 +1019,8 @@ class cpp_function : public function { ssize_t i = keyword_index(kwnames_in, arg_rec.name); if (i >= 0) { value = args_in_arr[n_args_in + static_cast(i)]; - used_kwargs.emplace_back(static_cast(i)); + used_kwargs.set(i, true); + used_kwargs_count++; } } @@ -1052,8 +1052,7 @@ class cpp_function : public function { } // 3. Check everything was consumed (unless we have a kwargs arg) - if (!func.has_kwargs && kwnames_in - && PyTuple_GET_SIZE(kwnames_in) > static_cast(used_kwargs.size())) { + if (!func.has_kwargs && used_kwargs_count < used_kwargs.size()) { continue; // Unconsumed kwargs, but no py::kwargs argument to accept them } @@ -1080,14 +1079,8 @@ class cpp_function : public function { // 4b. If we have a py::kwargs, pass on any remaining kwargs if (func.has_kwargs) { dict kwargs; - size_t used_i = 0; - size_t name_count - = kwnames_in ? static_cast(PyTuple_GET_SIZE(kwnames_in)) : 0; - used_kwargs.sort(); - for (size_t i = 0; i < name_count; ++i) { - if (used_i < used_kwargs.size() && used_kwargs[used_i] == i) { - ++used_i; - } else { + for (size_t i = 0; i < used_kwargs.size(); ++i) { + if (!used_kwargs[i]) { kwargs[PyTuple_GET_ITEM(kwnames_in, i)] = args_in_arr[n_args_in + i]; } } From c2e65f13b5c20b285b1d9f7333aedb7eff2df9de Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 17:26:27 -0500 Subject: [PATCH 25/35] Fix signedness for clang --- include/pybind11/pybind11.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index deece1ae1e..28603899b1 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1019,7 +1019,7 @@ class cpp_function : public function { ssize_t i = keyword_index(kwnames_in, arg_rec.name); if (i >= 0) { value = args_in_arr[n_args_in + static_cast(i)]; - used_kwargs.set(i, true); + used_kwargs.set(static_cast(i), true); used_kwargs_count++; } } From 1526a85d7249c4fbc21e3cd6046444a928f90983 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 17:38:55 -0500 Subject: [PATCH 26/35] Another signedness issue --- include/pybind11/pybind11.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 28603899b1..45e8e46f89 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1008,7 +1008,7 @@ class cpp_function : public function { // 2. Check kwargs and, failing that, defaults that may help complete the list small_vector used_kwargs( - kwnames_in ? PyTuple_GET_SIZE(kwnames_in) : 0, false); + kwnames_in ? static_cast(PyTuple_GET_SIZE(kwnames_in)) : 0, false); size_t used_kwargs_count = 0; if (args_copied < num_args) { for (; args_copied < num_args; ++args_copied) { From a926160779aaa4bec002cbdb80cab85d1bb1c68a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 27 Dec 2025 20:36:04 -0800 Subject: [PATCH 27/35] tests: Disable pytest-timeout for Pyodide (no signal.setitimer) Pyodide runs in a WebAssembly sandbox without POSIX signals, so `signal.setitimer` is not available. This causes pytest-timeout to crash with `AttributeError: module 'signal' has no attribute 'setitimer'` when timeout > 0. Override the test-command for Pyodide to keep timeout=0 (disabled). --- tests/pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pyproject.toml b/tests/pyproject.toml index 04db5cf264..8821ea3f34 100644 --- a/tests/pyproject.toml +++ b/tests/pyproject.toml @@ -28,6 +28,8 @@ PYBIND11_FINDPYTHON = true [tool.cibuildwheel] test-sources = ["tests", "pyproject.toml"] test-command = "python -m pytest -v -o timeout=120 -p no:cacheprovider tests" +# Pyodide doesn't have signal.setitimer, so pytest-timeout can't work with timeout > 0 +pyodide.test-command = "python -m pytest -v -o timeout=0 -p no:cacheprovider tests" environment.PIP_ONLY_BINARY = "numpy" environment.PIP_PREFER_BINARY = "1" From c163bf2a3112196516803da25e8f25e8ca2b86a1 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 18:53:01 -0500 Subject: [PATCH 28/35] Combine temp storage and args into one vector It's a good bit faster at the cost of this one scary reinterpret_cast. --- include/pybind11/cast.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 5ad80fdd36..0663e54815 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2214,7 +2214,7 @@ class unpacking_collector { flag. Note that the extra space is not passed directly in to vectorcall. */ m_args.reserve(sizeof...(values) + 1); - m_args.push_back(nullptr); + m_args.emplace_back(); if (args_has_keyword_or_ds()) { object names_list = list(); @@ -2240,8 +2240,13 @@ class unpacking_collector { if (m_names) { nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); } - PyObject *result = _PyObject_Vectorcall( - ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); + static_assert(sizeof(object) == sizeof(PyObject *), + "this cast requires objects to be wrapped pointers"); + PyObject *result + = _PyObject_Vectorcall(ptr, + reinterpret_cast(m_args.data() + 1), + nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, + m_names.ptr()); if (!result) { throw error_already_set(); } @@ -2255,8 +2260,7 @@ class unpacking_collector { } tuple val(nargs); for (size_t i = 0; i < nargs; ++i) { - val[i] = reinterpret_borrow( - m_args[i + 1]); // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + val[i] = m_args[i + 1]; // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) } return val; } @@ -2267,7 +2271,7 @@ class unpacking_collector { auto namestup = reinterpret_borrow(m_names); size_t offset = m_args.size() - namestup.size(); for (size_t i = 0; i < namestup.size(); ++i, ++offset) { - val[namestup[i]] = reinterpret_borrow(m_args[offset]); + val[namestup[i]] = m_args[offset]; } } return val; @@ -2287,8 +2291,7 @@ class unpacking_collector { type_id()); #endif } - m_args.push_back(o.ptr()); - m_temp.push_back(std::move(o)); + m_args.emplace_back(std::move(o)); } // * unpacking @@ -2297,8 +2300,7 @@ class unpacking_collector { return; } for (auto a : ap) { - m_temp.push_back(reinterpret_borrow(a)); // keep alive - m_args.push_back(a.ptr()); + m_args.emplace_back(reinterpret_borrow(a)); // keep alive } } @@ -2330,8 +2332,7 @@ class unpacking_collector { if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); } - m_temp.push_back(a.value); // keep alive - m_args.push_back(a.value.ptr()); + m_args.emplace_back(a.value); } // ** unpacking @@ -2352,8 +2353,7 @@ class unpacking_collector { if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { throw error_already_set(); } - m_temp.push_back(reinterpret_borrow(k.second)); // keep alive - m_args.push_back(k.second.ptr()); + m_args.emplace_back(reinterpret_borrow(k.second)); // keep alive } } @@ -2379,9 +2379,8 @@ class unpacking_collector { } private: - small_vector m_args; + small_vector m_args; object m_names; // null or a tuple of names - small_vector m_temp; }; /// Collect all arguments, including keywords and unpacking From 34fd03b08b953046272a357d0366b1dd28cce9b7 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 27 Dec 2025 18:55:05 -0500 Subject: [PATCH 29/35] Phrasing --- include/pybind11/cast.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 0663e54815..c366ee6b98 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2207,10 +2207,10 @@ class unpacking_collector { template explicit unpacking_collector(Ts &&...values) { /* - Python can sometimes utilize an extra space before the arguments to add on `self`. + Python can sometimes utilize an extra space before the arguments to prepend `self`. This is important enough that there is a special flag for it: PY_VECTORCALL_ARGUMENTS_OFFSET. - All we have to do it allocate an extra space at the beginning of this array, and set the + All we have to do is allocate an extra space at the beginning of this array, and set the flag. Note that the extra space is not passed directly in to vectorcall. */ m_args.reserve(sizeof...(values) + 1); From 1d539c2b36c75cad7d5da4e2b8d75b5b03972418 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 28 Dec 2025 13:43:33 -0500 Subject: [PATCH 30/35] Delete incorrect comment At 6, the struct is 144 bytes (not 128 bytes as the comment said). --- include/pybind11/cast.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index c366ee6b98..e1e7481d9e 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2078,8 +2078,7 @@ using is_pos_only = std::is_same, pos_only>; // forward declaration (definition in attr.h) struct function_record; -/// (Inline size chosen mostly arbitrarily; 6 should pad function_call out to two cache lines -/// (16 pointers) in size.) +/// Inline size chosen mostly arbitrarily. constexpr std::size_t arg_vector_small_size = 6; /// Internal data associated with a single function call From eea6651b31d1724333711a03481203262bef4bf2 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 28 Dec 2025 13:44:13 -0500 Subject: [PATCH 31/35] Fix push_back --- include/pybind11/detail/argument_vector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 89427c4ec0..6357a9a042 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -165,7 +165,7 @@ struct small_vector { return m_repr.hvector.vec[idx]; } - void push_back(T const &x) { emplace_back(x); } + void push_back(T x) { emplace_back(std::move(x)); } template void emplace_back(Args &&...x) { @@ -202,7 +202,7 @@ struct small_vector { using heap_vector = typename repr_type::heap_vector; heap_vector hv; hv.vec.reserve(reserved_size); - static_assert(std::is_nothrow_move_assignable::value, + static_assert(std::is_nothrow_move_constructible::value, "this conversion is not exception safe"); static_assert(std::is_nothrow_move_constructible::value, "this conversion is not exception safe"); From 5059cea1f64e179c058ef6f3d8121531759763c1 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 28 Dec 2025 18:04:12 -0500 Subject: [PATCH 32/35] Update push_back in argument_vector.h Co-authored-by: Aaron Gokaslan --- include/pybind11/detail/argument_vector.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 6357a9a042..00e0b202f3 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -165,7 +165,9 @@ struct small_vector { return m_repr.hvector.vec[idx]; } - void push_back(T x) { emplace_back(std::move(x)); } + void push_back(const T& x) { emplace_back(x); } + + void push_back(T&& x) { emplace_back(std::move(x)); } template void emplace_back(Args &&...x) { From 8ad086d21f22006166c5e24b9c65747f9492accd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 28 Dec 2025 23:04:35 +0000 Subject: [PATCH 33/35] style: pre-commit fixes --- include/pybind11/detail/argument_vector.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 00e0b202f3..8c9084cd9c 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -165,9 +165,9 @@ struct small_vector { return m_repr.hvector.vec[idx]; } - void push_back(const T& x) { emplace_back(x); } - - void push_back(T&& x) { emplace_back(std::move(x)); } + void push_back(const T &x) { emplace_back(x); } + + void push_back(T &&x) { emplace_back(std::move(x)); } template void emplace_back(Args &&...x) { From a78a398bf21c87764ae3fe1c3c9ffe7388a7b169 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 28 Dec 2025 18:05:19 -0500 Subject: [PATCH 34/35] Use real types for these instead of object They can be null if you "steal" a null handle. --- include/pybind11/cast.h | 46 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index e1e7481d9e..7b63618dc1 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2204,7 +2204,10 @@ template class unpacking_collector { public: template - explicit unpacking_collector(Ts &&...values) { + explicit unpacking_collector(Ts &&...values) + : m_names(reinterpret_steal( + handle())) // initialize to null to avoid useless allocation of 0-length tuple + { /* Python can sometimes utilize an extra space before the arguments to prepend `self`. This is important enough that there is a special flag for it: @@ -2216,7 +2219,7 @@ class unpacking_collector { m_args.emplace_back(); if (args_has_keyword_or_ds()) { - object names_list = list(); + list names_list; // collect_arguments guarantees this can't be constructed with kwargs before the last // positional so we don't need to worry about Ts... being in anything but normal python @@ -2226,7 +2229,8 @@ class unpacking_collector { m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); } else { - object not_used; + auto not_used + = reinterpret_steal(handle()); // initialize as null (to avoid an allocation) using expander = int[]; (void) expander{0, (process(not_used, std::forward(values)), 0)...}; @@ -2237,10 +2241,10 @@ class unpacking_collector { object call(PyObject *ptr) const { size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) if (m_names) { - nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); + nargs -= m_names.size(); } static_assert(sizeof(object) == sizeof(PyObject *), - "this cast requires objects to be wrapped pointers"); + "this cast requires object to be interpreted as PyObject*"); PyObject *result = _PyObject_Vectorcall(ptr, reinterpret_cast(m_args.data() + 1), @@ -2255,7 +2259,7 @@ class unpacking_collector { tuple args() const { size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) if (m_names) { - nargs -= static_cast(PyTuple_GET_SIZE(m_names.ptr())); + nargs -= m_names.size(); } tuple val(nargs); for (size_t i = 0; i < nargs; ++i) { @@ -2267,10 +2271,9 @@ class unpacking_collector { dict kwargs() const { dict val; if (m_names) { - auto namestup = reinterpret_borrow(m_names); - size_t offset = m_args.size() - namestup.size(); - for (size_t i = 0; i < namestup.size(); ++i, ++offset) { - val[namestup[i]] = m_args[offset]; + size_t offset = m_args.size() - m_names.size(); + for (size_t i = 0; i < m_names.size(); ++i, ++offset) { + val[m_names[i]] = m_args[offset]; } } return val; @@ -2279,7 +2282,7 @@ class unpacking_collector { private: // normal argument, possibly needing conversion template - void process(object & /*names_list*/, T &&x) { + void process(list & /*names_list*/, T &&x) { auto o = reinterpret_steal( detail::make_caster::cast(std::forward(x), policy, {})); if (!o) { @@ -2294,17 +2297,18 @@ class unpacking_collector { } // * unpacking - void process(object & /*names_list*/, detail::args_proxy ap) { + void process(list & /*names_list*/, detail::args_proxy ap) { if (!ap) { return; } for (auto a : ap) { - m_args.emplace_back(reinterpret_borrow(a)); // keep alive + m_args.emplace_back(reinterpret_borrow(a)); } } // named argument - void process(object &names_list, arg_v a) { + // NOLINTNEXTLINE(performance-unnecessary-value-param) + void process(list &names_list, arg_v a) { assert(names_list); if (!a.name) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) @@ -2328,14 +2332,12 @@ class unpacking_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); #endif } - if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { - throw error_already_set(); - } + names_list.append(std::move(name)); m_args.emplace_back(a.value); } // ** unpacking - void process(object &names_list, detail::kwargs_proxy kp) { + void process(list &names_list, detail::kwargs_proxy kp) { if (!kp) { return; } @@ -2349,10 +2351,8 @@ class unpacking_collector { multiple_values_error(name); #endif } - if (PyList_Append(names_list.ptr(), name.release().ptr()) < 0) { - throw error_already_set(); - } - m_args.emplace_back(reinterpret_borrow(k.second)); // keep alive + names_list.append(std::move(name)); + m_args.emplace_back(reinterpret_borrow(k.second)); } } @@ -2379,7 +2379,7 @@ class unpacking_collector { private: small_vector m_args; - object m_names; // null or a tuple of names + tuple m_names; }; /// Collect all arguments, including keywords and unpacking From d828e7dfd2b0cd676becf99e1d53853a6d0322ef Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 28 Dec 2025 18:11:10 -0800 Subject: [PATCH 35/35] refactor: Replace small_vector with ref_small_vector for explicit ownership Introduce `ref_small_vector` to manage PyObject* references in `unpacking_collector`, replacing the previous `small_vector` approach. Primary goals: 1. **Maintainability**: The previous implementation relied on `sizeof(object) == sizeof(PyObject*)` and used a reinterpret_cast to pass the object array to vectorcall. This coupling to py::object's internal layout could break if someone adds a debug field or other member to py::handle/py::object in the future. 2. **Readability**: The new `push_back_steal()` vs `push_back_borrow()` API makes reference counting intent explicit at each call site, rather than relying on implicit py::object semantics. 3. **Intuitive code**: Storing `PyObject*` directly and passing it to `_PyObject_Vectorcall` without casts is straightforward and matches what the C API expects. No "scary" reinterpret_cast needed. Additional benefits: - `PyObject*` is trivially copyable, simplifying vector operations - Batch decref in destructor (tight loop vs N individual object destructors) - Self-documenting ownership semantics Design consideration: We considered folding the ref-counting functionality directly into `small_vector` via template specialization for `PyObject*`. We decided against this because: - It would give `small_vector` a different interface than the generic `small_vector` (steal/borrow vs push_back) - Someone might want a non-ref-counting `small_vector` - The specialization behavior could surprise users expecting uniform semantics A separate `ref_small_vector` type makes the ref-counting behavior explicit and self-documenting, while keeping `small_vector` generic and predictable. --- include/pybind11/cast.h | 31 +++++------- include/pybind11/detail/argument_vector.h | 58 +++++++++++++++++++++++ 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 7b63618dc1..f5a94da206 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2216,7 +2216,7 @@ class unpacking_collector { flag. Note that the extra space is not passed directly in to vectorcall. */ m_args.reserve(sizeof...(values) + 1); - m_args.emplace_back(); + m_args.push_back_null(); if (args_has_keyword_or_ds()) { list names_list; @@ -2243,13 +2243,8 @@ class unpacking_collector { if (m_names) { nargs -= m_names.size(); } - static_assert(sizeof(object) == sizeof(PyObject *), - "this cast requires object to be interpreted as PyObject*"); - PyObject *result - = _PyObject_Vectorcall(ptr, - reinterpret_cast(m_args.data() + 1), - nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, - m_names.ptr()); + PyObject *result = _PyObject_Vectorcall( + ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); if (!result) { throw error_already_set(); } @@ -2263,7 +2258,8 @@ class unpacking_collector { } tuple val(nargs); for (size_t i = 0; i < nargs; ++i) { - val[i] = m_args[i + 1]; // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + val[i] = reinterpret_borrow(m_args[i + 1]); } return val; } @@ -2273,7 +2269,7 @@ class unpacking_collector { if (m_names) { size_t offset = m_args.size() - m_names.size(); for (size_t i = 0; i < m_names.size(); ++i, ++offset) { - val[m_names[i]] = m_args[offset]; + val[m_names[i]] = reinterpret_borrow(m_args[offset]); } } return val; @@ -2283,9 +2279,8 @@ class unpacking_collector { // normal argument, possibly needing conversion template void process(list & /*names_list*/, T &&x) { - auto o = reinterpret_steal( - detail::make_caster::cast(std::forward(x), policy, {})); - if (!o) { + handle h = detail::make_caster::cast(std::forward(x), policy, {}); + if (!h) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1)); #else @@ -2293,7 +2288,7 @@ class unpacking_collector { type_id()); #endif } - m_args.emplace_back(std::move(o)); + m_args.push_back_steal(h.ptr()); // cast returns a new reference } // * unpacking @@ -2302,7 +2297,7 @@ class unpacking_collector { return; } for (auto a : ap) { - m_args.emplace_back(reinterpret_borrow(a)); + m_args.push_back_borrow(a.ptr()); } } @@ -2333,7 +2328,7 @@ class unpacking_collector { #endif } names_list.append(std::move(name)); - m_args.emplace_back(a.value); + m_args.push_back_borrow(a.value.ptr()); } // ** unpacking @@ -2352,7 +2347,7 @@ class unpacking_collector { #endif } names_list.append(std::move(name)); - m_args.emplace_back(reinterpret_borrow(k.second)); + m_args.push_back_borrow(k.second.ptr()); } } @@ -2378,7 +2373,7 @@ class unpacking_collector { } private: - small_vector m_args; + ref_small_vector m_args; tuple m_names; }; diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index 8c9084cd9c..6e2c2ec481 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -355,5 +355,63 @@ using argument_vector = small_vector; template using args_convert_vector = small_vector; +/// A small_vector of PyObject* that holds references and releases them on destruction. +/// This provides explicit ownership semantics without relying on py::object's +/// destructor, and avoids the need for reinterpret_cast when passing to vectorcall. +template +class ref_small_vector { +public: + ref_small_vector() = default; + + ~ref_small_vector() { + for (std::size_t i = 0; i < m_ptrs.size(); ++i) { + Py_XDECREF(m_ptrs[i]); + } + } + + // Disable copy (prevent accidental double-decref) + ref_small_vector(const ref_small_vector &) = delete; + ref_small_vector &operator=(const ref_small_vector &) = delete; + + // Move is allowed + ref_small_vector(ref_small_vector &&other) noexcept : m_ptrs(std::move(other.m_ptrs)) { + // other.m_ptrs is now empty, so its destructor won't decref anything + } + + ref_small_vector &operator=(ref_small_vector &&other) noexcept { + if (this != &other) { + // Decref our current contents + for (std::size_t i = 0; i < m_ptrs.size(); ++i) { + Py_XDECREF(m_ptrs[i]); + } + m_ptrs = std::move(other.m_ptrs); + } + return *this; + } + + /// Add a pointer, taking ownership (no incref, will decref on destruction) + void push_back_steal(PyObject *p) { m_ptrs.push_back(p); } + + /// Add a pointer, borrowing (increfs now, will decref on destruction) + void push_back_borrow(PyObject *p) { + Py_XINCREF(p); + m_ptrs.push_back(p); + } + + /// Add a null pointer (for PY_VECTORCALL_ARGUMENTS_OFFSET slot) + void push_back_null() { m_ptrs.push_back(nullptr); } + + void reserve(std::size_t sz) { m_ptrs.reserve(sz); } + + std::size_t size() const { return m_ptrs.size(); } + + PyObject *operator[](std::size_t idx) const { return m_ptrs[idx]; } + + PyObject *const *data() const { return m_ptrs.data(); } + +private: + small_vector m_ptrs; +}; + PYBIND11_NAMESPACE_END(detail) PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)