Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ef662f6
Make argument_vector re-usable for other types.
b-pass Dec 24, 2025
418a034
Attempt to collect args into array for vectorcall
b-pass Dec 24, 2025
fc22c97
Revert "Attempt to collect args into array for vectorcall"
b-pass Dec 25, 2025
9894a5a
Implement vectorcall args collector
b-pass Dec 25, 2025
73ee046
pre-commit fixes
b-pass Dec 25, 2025
4615f8a
Checkpoint in moving to METH_FASTCALL
b-pass Dec 25, 2025
1dc7b3f
pre-commit fixes
b-pass Dec 25, 2025
893df5d
Use the names tuple directly, cleaner code and less reference counting
b-pass Dec 26, 2025
655db8a
Fix unit test, the code now holds more references
b-pass Dec 26, 2025
c43b156
Make clangtidy happy
b-pass Dec 26, 2025
d881a7b
Oops, _v is C++14
b-pass Dec 26, 2025
76831ec
style: pre-commit fixes
pre-commit-ci[bot] Dec 26, 2025
0285818
Minor code cleanup
b-pass Dec 26, 2025
cc2b5a9
Fix signed conversions
b-pass Dec 26, 2025
25485b3
Fix args expansion
b-pass Dec 26, 2025
6c6bd71
style: pre-commit fixes
pre-commit-ci[bot] Dec 26, 2025
afd6299
Code cleanup
b-pass Dec 26, 2025
b8ba02d
Merge branch 'master' into b-pass→vectorcall
rwgk Dec 27, 2025
6ed6d5a
fix(tests): Install multiple-interpreter test modules into wheel
rwgk Dec 27, 2025
ca72d6c
tests: Pin numpy 2.4.0 for Python 3.14 CI tests
rwgk Dec 27, 2025
7148a4a
tests: Add verbose flag to CIBW pytest command
rwgk Dec 27, 2025
bc9c877
tests: Skip subinterpreter tests on iOS, add pytest timeout
rwgk Dec 27, 2025
849df0b
More cleanups in argument vector, per comments.
b-pass Dec 27, 2025
b517942
Per Cursor, move all versions to Vectorcall since it has been support…
b-pass Dec 27, 2025
b4ca366
Switch to a bool vec for the used_kwargs flag...
b-pass Dec 27, 2025
c2e65f1
Fix signedness for clang
b-pass Dec 27, 2025
1526a85
Another signedness issue
b-pass Dec 27, 2025
a926160
tests: Disable pytest-timeout for Pyodide (no signal.setitimer)
rwgk Dec 28, 2025
c163bf2
Combine temp storage and args into one vector
b-pass Dec 27, 2025
34fd03b
Phrasing
b-pass Dec 27, 2025
1d539c2
Delete incorrect comment
b-pass Dec 28, 2025
eea6651
Fix push_back
b-pass Dec 28, 2025
5059cea
Update push_back in argument_vector.h
b-pass Dec 28, 2025
8ad086d
style: pre-commit fixes
pre-commit-ci[bot] Dec 28, 2025
a78a398
Use real types for these instead of object
b-pass Dec 28, 2025
d828e7d
refactor: Replace small_vector<object> with ref_small_vector for expl…
rwgk Dec 29, 2025
e09c222
Merge branch 'master' into b-pass→vectorcall
rwgk Dec 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 108 additions & 87 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -2078,8 +2078,7 @@ using is_pos_only = std::is_same<intrinsic_t<T>, pos_only>;
// forward declaration (definition in attr.h)
struct function_record;

/// (Inline size chosen mostly arbitrarily; 6 should pad function_call out to two cache lines
/// (16 pointers) in size.)
/// Inline size chosen mostly arbitrarily.
constexpr std::size_t arg_vector_small_size = 6;

/// Internal data associated with a single function call
Expand Down Expand Up @@ -2191,94 +2190,130 @@ class argument_loader {
std::tuple<make_caster<Args>...> argcasters;
};

/// Helper class which collects only positional arguments for a Python function call.
/// A fancier version below can collect any argument, but this one is optimal for simple calls.
// [workaround(intel)] Separate function required here
// We need to put this into a separate function because the Intel compiler
// fails to compile enable_if_t<!all_of<is_positional<Args>...>::value>
// (tested with ICC 2021.1 Beta 20200827).
template <typename... Args>
constexpr bool args_has_keyword_or_ds() {
return any_of<is_keyword_or_ds<Args>...>::value;
}

/// Helper class which collects positional, keyword, * and ** arguments for a Python function call
template <return_value_policy policy>
class simple_collector {
class unpacking_collector {
public:
template <typename... Ts>
explicit simple_collector(Ts &&...values)
: m_args(pybind11::make_tuple<policy>(std::forward<Ts>(values)...)) {}

const tuple &args() const & { return m_args; }
dict kwargs() const { return {}; }
explicit unpacking_collector(Ts &&...values)
: m_names(reinterpret_steal<tuple>(
handle())) // initialize to null to avoid useless allocation of 0-length tuple
{
/*
Python can sometimes utilize an extra space before the arguments to prepend `self`.
This is important enough that there is a special flag for it:
PY_VECTORCALL_ARGUMENTS_OFFSET.
All we have to do is allocate an extra space at the beginning of this array, and set the
flag. Note that the extra space is not passed directly in to vectorcall.
*/
m_args.reserve(sizeof...(values) + 1);
m_args.push_back_null();

if (args_has_keyword_or_ds<Ts...>()) {
list names_list;

// collect_arguments guarantees this can't be constructed with kwargs before the last
// positional so we don't need to worry about Ts... being in anything but normal python
// order.
using expander = int[];
(void) expander{0, (process(names_list, std::forward<Ts>(values)), 0)...};

m_names = reinterpret_steal<tuple>(PyList_AsTuple(names_list.ptr()));
} else {
auto not_used
= reinterpret_steal<list>(handle()); // initialize as null (to avoid an allocation)

tuple args() && { return std::move(m_args); }
using expander = int[];
(void) expander{0, (process(not_used, std::forward<Ts>(values)), 0)...};
}
}

/// Call a Python function and pass the collected arguments
object call(PyObject *ptr) const {
PyObject *result = PyObject_CallObject(ptr, m_args.ptr());
size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor)
if (m_names) {
nargs -= m_names.size();
}
PyObject *result = _PyObject_Vectorcall(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slight nit could have been a py::handle

ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr());
if (!result) {
throw error_already_set();
}
return reinterpret_steal<object>(result);
}

private:
tuple m_args;
};

/// Helper class which collects positional, keyword, * and ** arguments for a Python function call
template <return_value_policy policy>
class unpacking_collector {
public:
template <typename... Ts>
explicit unpacking_collector(Ts &&...values) {
// Tuples aren't (easily) resizable so a list is needed for collection,
// but the actual function call strictly requires a tuple.
auto args_list = list();
using expander = int[];
(void) expander{0, (process(args_list, std::forward<Ts>(values)), 0)...};

m_args = std::move(args_list);
tuple args() const {
size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor)
if (m_names) {
nargs -= m_names.size();
}
tuple val(nargs);
for (size_t i = 0; i < nargs; ++i) {
// +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor)
val[i] = reinterpret_borrow<object>(m_args[i + 1]);
}
return val;
}

const tuple &args() const & { return m_args; }
const dict &kwargs() const & { return m_kwargs; }

tuple args() && { return std::move(m_args); }
dict kwargs() && { return std::move(m_kwargs); }

/// Call a Python function and pass the collected arguments
object call(PyObject *ptr) const {
PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr());
if (!result) {
throw error_already_set();
dict kwargs() const {
dict val;
if (m_names) {
size_t offset = m_args.size() - m_names.size();
for (size_t i = 0; i < m_names.size(); ++i, ++offset) {
val[m_names[i]] = reinterpret_borrow<object>(m_args[offset]);
}
}
return reinterpret_steal<object>(result);
return val;
}

private:
// normal argument, possibly needing conversion
template <typename T>
void process(list &args_list, T &&x) {
auto o = reinterpret_steal<object>(
detail::make_caster<T>::cast(std::forward<T>(x), policy, {}));
if (!o) {
void process(list & /*names_list*/, T &&x) {
handle h = detail::make_caster<T>::cast(std::forward<T>(x), policy, {});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converting this to a handle here makes me a bit uncomfortable. You could keep it in an object and it just use the move from ctor, the only downside is you'll have one extra call to PYXDECREF on a nullptr.

if (!h) {
#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()));
throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1));
#else
throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()),
throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1),
type_id<T>());
#endif
}
args_list.append(std::move(o));
m_args.push_back_steal(h.ptr()); // cast returns a new reference
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
m_args.push_back_steal(h.ptr()); // cast returns a new reference
m_args.push_back_steal(std::move(o)); // cast returns a new reference

Then it can keep the good old object semantics.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be normal objects that you just release. The borrow semantics are a bit more ackward unless you use move by value implicit construction. This might actually be cleaner.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I explored what this change would look like. Here's the diff (Cursor-generated):

diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -2279,8 +2279,9 @@ private:
     // normal argument, possibly needing conversion
     template <typename T>
     void process(list & /*names_list*/, T &&x) {
-        handle h = detail::make_caster<T>::cast(std::forward<T>(x), policy, {});
-        if (!h) {
+        auto o = reinterpret_steal<object>(
+            detail::make_caster<T>::cast(std::forward<T>(x), policy, {}));
+        if (!o) {
 #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1));
 #else
@@ -2288,7 +2289,7 @@ private:
                                                         type_id<T>());
 #endif
         }
-        m_args.push_back_steal(h.ptr()); // cast returns a new reference
+        m_args.push_back_steal(std::move(o));
     }

     // * unpacking
@@ -2297,7 +2298,7 @@ private:
             return;
         }
         for (auto a : ap) {
-            m_args.push_back_borrow(a.ptr());
+            m_args.push_back_borrow(a);
         }
     }

@@ -2328,7 +2329,7 @@ private:
 #endif
         }
         names_list.append(std::move(name));
-        m_args.push_back_borrow(a.value.ptr());
+        m_args.push_back_borrow(a.value);
     }

     // ** unpacking
@@ -2347,7 +2348,7 @@ private:
 #endif
             }
             names_list.append(std::move(name));
-            m_args.push_back_borrow(k.second.ptr());
+            m_args.push_back_borrow(k.second);
         }
     }

diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h
--- a/include/pybind11/detail/argument_vector.h
+++ b/include/pybind11/detail/argument_vector.h
@@ -389,13 +389,13 @@ public:
         return *this;
     }

-    /// Add a pointer, taking ownership (no incref, will decref on destruction)
-    void push_back_steal(PyObject *p) { m_ptrs.push_back(p); }
+    /// Add an object, taking ownership (will decref on destruction)
+    void push_back_steal(object &&o) { m_ptrs.push_back(o.release().ptr()); }

-    /// Add a pointer, borrowing (increfs now, will decref on destruction)
-    void push_back_borrow(PyObject *p) {
-        Py_XINCREF(p);
-        m_ptrs.push_back(p);
+    /// Add a handle, borrowing (increfs now, will decref on destruction)
+    void push_back_borrow(handle h) {
+        Py_XINCREF(h.ptr());
+        m_ptrs.push_back(h.ptr());
     }

     /// Add a null pointer (for PY_VECTORCALL_ARGUMENTS_OFFSET slot)

The change compiles and local tests pass. However, I'd prefer to keep the current implementation for these reasons:

  1. No reduction in code size — The change is net +1 line. The reinterpret_steal<object>(...) wrapper and std::move() add verbosity that offsets the minor savings from dropping .ptr() at the borrow call sites.

  2. The current code is explicit and straightforwardref_small_vector is a purpose-built container for managing PyObject* ownership. Having it accept raw pointers makes the ownership transfer explicit at the call site. When you see push_back_steal(h.ptr()), it's immediately clear you're handing off a raw pointer to a container that will manage its lifetime.

  3. The abstraction doesn't simplify anything — We still need to deal with the underlying PyObject* for the vectorcall API. Wrapping the interface in object&&/handle just moves the .ptr() and .release() calls inside the method, without reducing overall complexity.

I think in this case the added abstraction layer doesn't provide tangible benefits. The current approach is clear, direct, and easy to follow.

}

void process(list &args_list, detail::args_proxy ap) {
// * unpacking
void process(list & /*names_list*/, detail::args_proxy ap) {
if (!ap) {
return;
}
for (auto a : ap) {
args_list.append(a);
m_args.push_back_borrow(a.ptr());
}
}

void process(list & /*args_list*/, arg_v a) {
// named argument
// NOLINTNEXTLINE(performance-unnecessary-value-param)
void process(list &names_list, arg_v a) {
assert(names_list);
if (!a.name) {
#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
nameless_argument_error();
#else
nameless_argument_error(a.type);
#endif
}
if (m_kwargs.contains(a.name)) {
auto name = str(a.name);
if (names_list.contains(name)) {
#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
multiple_values_error();
#else
Expand All @@ -2292,22 +2327,27 @@ class unpacking_collector {
throw cast_error_unable_to_convert_call_arg(a.name, a.type);
#endif
}
m_kwargs[a.name] = std::move(a.value);
names_list.append(std::move(name));
m_args.push_back_borrow(a.value.ptr());
Copy link
Collaborator

@Skylion007 Skylion007 Dec 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah both of these could just be a push_back(&&o) with a release ptr syntax and then you don't have to duplicate the logic and while the implementation is py::objects the input is still py::object. The one downside is the extra xdecref call on the nullptr stored in the py::object, but doubt that nullcheck will be a bottleneck?

Suggested change
m_args.push_back_borrow(a.value.ptr());
m_args.push_back_steal(reinterpret_borrow<object>(a.value));

}

void process(list & /*args_list*/, detail::kwargs_proxy kp) {
// ** unpacking
void process(list &names_list, detail::kwargs_proxy kp) {
if (!kp) {
return;
}
for (auto k : reinterpret_borrow<dict>(kp)) {
if (m_kwargs.contains(k.first)) {
assert(names_list);
for (auto &&k : reinterpret_borrow<dict>(kp)) {
auto name = str(k.first);
if (names_list.contains(name)) {
#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
multiple_values_error();
#else
multiple_values_error(str(k.first));
multiple_values_error(name);
#endif
}
m_kwargs[k.first] = k.second;
names_list.append(std::move(name));
m_args.push_back_borrow(k.second.ptr());
}
}

Expand All @@ -2333,39 +2373,20 @@ class unpacking_collector {
}

private:
tuple m_args;
dict m_kwargs;
ref_small_vector<arg_vector_small_size> m_args;
tuple m_names;
};

// [workaround(intel)] Separate function required here
// We need to put this into a separate function because the Intel compiler
// fails to compile enable_if_t<!all_of<is_positional<Args>...>::value>
// (tested with ICC 2021.1 Beta 20200827).
template <typename... Args>
constexpr bool args_are_all_positional() {
return all_of<is_positional<Args>...>::value;
}

/// Collect only positional arguments for a Python function call
template <return_value_policy policy,
typename... Args,
typename = enable_if_t<args_are_all_positional<Args...>()>>
simple_collector<policy> collect_arguments(Args &&...args) {
return simple_collector<policy>(std::forward<Args>(args)...);
}

/// Collect all arguments, including keywords and unpacking (only instantiated when needed)
template <return_value_policy policy,
typename... Args,
typename = enable_if_t<!args_are_all_positional<Args...>()>>
/// Collect all arguments, including keywords and unpacking
template <return_value_policy policy, typename... Args>
unpacking_collector<policy> collect_arguments(Args &&...args) {
// Following argument order rules for generalized unpacking according to PEP 448
static_assert(constexpr_last<is_positional, Args...>()
< constexpr_first<is_keyword_or_ds, Args...>()
&& constexpr_last<is_s_unpacking, Args...>()
< constexpr_first<is_ds_unpacking, Args...>(),
"Invalid function call: positional args must precede keywords and ** unpacking; "
"* unpacking must precede ** unpacking");
static_assert(
constexpr_last<is_positional, Args...>() < constexpr_first<is_keyword_or_ds, Args...>(),
"Invalid function call: positional args must precede keywords and */** unpacking;");
static_assert(constexpr_last<is_s_unpacking, Args...>()
< constexpr_first<is_ds_unpacking, Args...>(),
"Invalid function call: * unpacking must precede ** unpacking");
return unpacking_collector<policy>(std::forward<Args>(args)...);
}

Expand Down
Loading
Loading