diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 310b77b342..f5a94da206 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -2078,8 +2078,7 @@ using is_pos_only = std::is_same, pos_only>; // forward declaration (definition in attr.h) struct function_record; -/// (Inline size chosen mostly arbitrarily; 6 should pad function_call out to two cache lines -/// (16 pointers) in size.) +/// Inline size chosen mostly arbitrarily. constexpr std::size_t arg_vector_small_size = 6; /// Internal data associated with a single function call @@ -2191,86 +2190,121 @@ class argument_loader { std::tuple...> argcasters; }; -/// Helper class which collects only positional arguments for a Python function call. -/// A fancier version below can collect any argument, but this one is optimal for simple calls. +// [workaround(intel)] Separate function required here +// We need to put this into a separate function because the Intel compiler +// fails to compile enable_if_t...>::value> +// (tested with ICC 2021.1 Beta 20200827). +template +constexpr bool args_has_keyword_or_ds() { + return any_of...>::value; +} + +/// Helper class which collects positional, keyword, * and ** arguments for a Python function call template -class simple_collector { +class unpacking_collector { public: template - explicit simple_collector(Ts &&...values) - : m_args(pybind11::make_tuple(std::forward(values)...)) {} - - const tuple &args() const & { return m_args; } - dict kwargs() const { return {}; } + explicit unpacking_collector(Ts &&...values) + : m_names(reinterpret_steal( + handle())) // initialize to null to avoid useless allocation of 0-length tuple + { + /* + Python can sometimes utilize an extra space before the arguments to prepend `self`. + This is important enough that there is a special flag for it: + PY_VECTORCALL_ARGUMENTS_OFFSET. + All we have to do is allocate an extra space at the beginning of this array, and set the + flag. Note that the extra space is not passed directly in to vectorcall. + */ + m_args.reserve(sizeof...(values) + 1); + m_args.push_back_null(); + + if (args_has_keyword_or_ds()) { + list names_list; + + // collect_arguments guarantees this can't be constructed with kwargs before the last + // positional so we don't need to worry about Ts... being in anything but normal python + // order. + using expander = int[]; + (void) expander{0, (process(names_list, std::forward(values)), 0)...}; + + m_names = reinterpret_steal(PyList_AsTuple(names_list.ptr())); + } else { + auto not_used + = reinterpret_steal(handle()); // initialize as null (to avoid an allocation) - tuple args() && { return std::move(m_args); } + using expander = int[]; + (void) expander{0, (process(not_used, std::forward(values)), 0)...}; + } + } /// Call a Python function and pass the collected arguments object call(PyObject *ptr) const { - PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); + size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + if (m_names) { + nargs -= m_names.size(); + } + PyObject *result = _PyObject_Vectorcall( + ptr, m_args.data() + 1, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, m_names.ptr()); if (!result) { throw error_already_set(); } return reinterpret_steal(result); } -private: - tuple m_args; -}; - -/// Helper class which collects positional, keyword, * and ** arguments for a Python function call -template -class unpacking_collector { -public: - template - explicit unpacking_collector(Ts &&...values) { - // Tuples aren't (easily) resizable so a list is needed for collection, - // but the actual function call strictly requires a tuple. - auto args_list = list(); - using expander = int[]; - (void) expander{0, (process(args_list, std::forward(values)), 0)...}; - - m_args = std::move(args_list); + tuple args() const { + size_t nargs = m_args.size() - 1; // -1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + if (m_names) { + nargs -= m_names.size(); + } + tuple val(nargs); + for (size_t i = 0; i < nargs; ++i) { + // +1 for PY_VECTORCALL_ARGUMENTS_OFFSET (see ctor) + val[i] = reinterpret_borrow(m_args[i + 1]); + } + return val; } - const tuple &args() const & { return m_args; } - const dict &kwargs() const & { return m_kwargs; } - - tuple args() && { return std::move(m_args); } - dict kwargs() && { return std::move(m_kwargs); } - - /// Call a Python function and pass the collected arguments - object call(PyObject *ptr) const { - PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); - if (!result) { - throw error_already_set(); + dict kwargs() const { + dict val; + if (m_names) { + size_t offset = m_args.size() - m_names.size(); + for (size_t i = 0; i < m_names.size(); ++i, ++offset) { + val[m_names[i]] = reinterpret_borrow(m_args[offset]); + } } - return reinterpret_steal(result); + return val; } private: + // normal argument, possibly needing conversion template - void process(list &args_list, T &&x) { - auto o = reinterpret_steal( - detail::make_caster::cast(std::forward(x), policy, {})); - if (!o) { + void process(list & /*names_list*/, T &&x) { + handle h = detail::make_caster::cast(std::forward(x), policy, {}); + if (!h) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size())); + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1)); #else - throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()), + throw cast_error_unable_to_convert_call_arg(std::to_string(m_args.size() - 1), type_id()); #endif } - args_list.append(std::move(o)); + m_args.push_back_steal(h.ptr()); // cast returns a new reference } - void process(list &args_list, detail::args_proxy ap) { + // * unpacking + void process(list & /*names_list*/, detail::args_proxy ap) { + if (!ap) { + return; + } for (auto a : ap) { - args_list.append(a); + m_args.push_back_borrow(a.ptr()); } } - void process(list & /*args_list*/, arg_v a) { + // named argument + // NOLINTNEXTLINE(performance-unnecessary-value-param) + void process(list &names_list, arg_v a) { + assert(names_list); if (!a.name) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) nameless_argument_error(); @@ -2278,7 +2312,8 @@ class unpacking_collector { nameless_argument_error(a.type); #endif } - if (m_kwargs.contains(a.name)) { + auto name = str(a.name); + if (names_list.contains(name)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else @@ -2292,22 +2327,27 @@ class unpacking_collector { throw cast_error_unable_to_convert_call_arg(a.name, a.type); #endif } - m_kwargs[a.name] = std::move(a.value); + names_list.append(std::move(name)); + m_args.push_back_borrow(a.value.ptr()); } - void process(list & /*args_list*/, detail::kwargs_proxy kp) { + // ** unpacking + void process(list &names_list, detail::kwargs_proxy kp) { if (!kp) { return; } - for (auto k : reinterpret_borrow(kp)) { - if (m_kwargs.contains(k.first)) { + assert(names_list); + for (auto &&k : reinterpret_borrow(kp)) { + auto name = str(k.first); + if (names_list.contains(name)) { #if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) multiple_values_error(); #else - multiple_values_error(str(k.first)); + multiple_values_error(name); #endif } - m_kwargs[k.first] = k.second; + names_list.append(std::move(name)); + m_args.push_back_borrow(k.second.ptr()); } } @@ -2333,39 +2373,20 @@ class unpacking_collector { } private: - tuple m_args; - dict m_kwargs; + ref_small_vector m_args; + tuple m_names; }; -// [workaround(intel)] Separate function required here -// We need to put this into a separate function because the Intel compiler -// fails to compile enable_if_t...>::value> -// (tested with ICC 2021.1 Beta 20200827). -template -constexpr bool args_are_all_positional() { - return all_of...>::value; -} - -/// Collect only positional arguments for a Python function call -template ()>> -simple_collector collect_arguments(Args &&...args) { - return simple_collector(std::forward(args)...); -} - -/// Collect all arguments, including keywords and unpacking (only instantiated when needed) -template ()>> +/// Collect all arguments, including keywords and unpacking +template unpacking_collector collect_arguments(Args &&...args) { // Following argument order rules for generalized unpacking according to PEP 448 - static_assert(constexpr_last() - < constexpr_first() - && constexpr_last() - < constexpr_first(), - "Invalid function call: positional args must precede keywords and ** unpacking; " - "* unpacking must precede ** unpacking"); + static_assert( + constexpr_last() < constexpr_first(), + "Invalid function call: positional args must precede keywords and */** unpacking;"); + static_assert(constexpr_last() + < constexpr_first(), + "Invalid function call: * unpacking must precede ** unpacking"); return unpacking_collector(std::forward(args)...); } diff --git a/include/pybind11/detail/argument_vector.h b/include/pybind11/detail/argument_vector.h index e15a3cfabc..6e2c2ec481 100644 --- a/include/pybind11/detail/argument_vector.h +++ b/include/pybind11/detail/argument_vector.h @@ -66,24 +66,23 @@ union inline_array_or_vector { inline_array iarray; heap_vector hvector; - static_assert(std::is_trivially_move_constructible::value, - "ArrayT must be trivially move constructible"); - static_assert(std::is_trivially_destructible::value, - "ArrayT must be trivially destructible"); - inline_array_or_vector() : iarray() {} + ~inline_array_or_vector() { - if (!is_inline()) { + if (is_inline()) { + iarray.~inline_array(); + } else { hvector.~heap_vector(); } } + // Disable copy ctor and assignment. inline_array_or_vector(const inline_array_or_vector &) = delete; inline_array_or_vector &operator=(const inline_array_or_vector &) = delete; inline_array_or_vector(inline_array_or_vector &&rhs) noexcept { if (rhs.is_inline()) { - std::memcpy(&iarray, &rhs.iarray, sizeof(iarray)); + new (&iarray) inline_array(std::move(rhs.iarray)); } else { new (&hvector) heap_vector(std::move(rhs.hvector)); } @@ -95,17 +94,16 @@ union inline_array_or_vector { return *this; } + if (is_inline()) { + iarray.~inline_array(); + } else { + hvector.~heap_vector(); + } + if (rhs.is_inline()) { - if (!is_inline()) { - hvector.~heap_vector(); - } - std::memcpy(&iarray, &rhs.iarray, sizeof(iarray)); + new (&iarray) inline_array(std::move(rhs.iarray)); } else { - if (is_inline()) { - new (&hvector) heap_vector(std::move(rhs.hvector)); - } else { - hvector = std::move(rhs.hvector); - } + new (&hvector) heap_vector(std::move(rhs.hvector)); } return *this; } @@ -126,18 +124,16 @@ union inline_array_or_vector { } }; -// small_vector-like container to avoid heap allocation for N or fewer -// arguments. -template -struct argument_vector { +template +struct small_vector { public: - argument_vector() = default; + small_vector() = default; // Disable copy ctor and assignment. - argument_vector(const argument_vector &) = delete; - argument_vector &operator=(const argument_vector &) = delete; - argument_vector(argument_vector &&) noexcept = default; - argument_vector &operator=(argument_vector &&) noexcept = default; + small_vector(const small_vector &) = delete; + small_vector &operator=(const small_vector &) = delete; + small_vector(small_vector &&) noexcept = default; + small_vector &operator=(small_vector &&) noexcept = default; std::size_t size() const { if (is_inline()) { @@ -146,7 +142,14 @@ struct argument_vector { return m_repr.hvector.vec.size(); } - handle &operator[](std::size_t idx) { + T const *data() const { + if (is_inline()) { + return m_repr.iarray.arr.data(); + } + return m_repr.hvector.vec.data(); + } + + T &operator[](std::size_t idx) { assert(idx < size()); if (is_inline()) { return m_repr.iarray.arr[idx]; @@ -154,7 +157,7 @@ struct argument_vector { return m_repr.hvector.vec[idx]; } - handle operator[](std::size_t idx) const { + T const &operator[](std::size_t idx) const { assert(idx < size()); if (is_inline()) { return m_repr.iarray.arr[idx]; @@ -162,28 +165,28 @@ struct argument_vector { return m_repr.hvector.vec[idx]; } - void push_back(handle x) { + void push_back(const T &x) { emplace_back(x); } + + void push_back(T &&x) { emplace_back(std::move(x)); } + + template + void emplace_back(Args &&...x) { if (is_inline()) { auto &ha = m_repr.iarray; - if (ha.size == N) { - move_to_heap_vector_with_reserved_size(N + 1); - push_back_slow_path(x); + if (ha.size == InlineSize) { + move_to_heap_vector_with_reserved_size(InlineSize + 1); + m_repr.hvector.vec.emplace_back(std::forward(x)...); } else { - ha.arr[ha.size++] = x; + ha.arr[ha.size++] = T(std::forward(x)...); } } else { - push_back_slow_path(x); + m_repr.hvector.vec.emplace_back(std::forward(x)...); } } - template - void emplace_back(Arg &&x) { - push_back(handle(x)); - } - void reserve(std::size_t sz) { if (is_inline()) { - if (sz > N) { + if (sz > InlineSize) { move_to_heap_vector_with_reserved_size(sz); } } else { @@ -192,7 +195,7 @@ struct argument_vector { } private: - using repr_type = inline_array_or_vector; + using repr_type = inline_array_or_vector; repr_type m_repr; PYBIND11_NOINLINE void move_to_heap_vector_with_reserved_size(std::size_t reserved_size) { @@ -201,32 +204,33 @@ struct argument_vector { using heap_vector = typename repr_type::heap_vector; heap_vector hv; hv.vec.reserve(reserved_size); - std::copy(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); + static_assert(std::is_nothrow_move_constructible::value, + "this conversion is not exception safe"); + static_assert(std::is_nothrow_move_constructible::value, + "this conversion is not exception safe"); + std::move(ha.arr.begin(), ha.arr.begin() + ha.size, std::back_inserter(hv.vec)); new (&m_repr.hvector) heap_vector(std::move(hv)); } - PYBIND11_NOINLINE void push_back_slow_path(handle x) { m_repr.hvector.vec.push_back(x); } - PYBIND11_NOINLINE void reserve_slow_path(std::size_t sz) { m_repr.hvector.vec.reserve(sz); } bool is_inline() const { return m_repr.is_inline(); } }; -// small_vector-like container to avoid heap allocation for N or fewer -// arguments. +// Container to avoid heap allocation for kRequestedInlineSize or fewer booleans. template -struct args_convert_vector { +struct small_vector { private: public: - args_convert_vector() = default; + small_vector() = default; // Disable copy ctor and assignment. - args_convert_vector(const args_convert_vector &) = delete; - args_convert_vector &operator=(const args_convert_vector &) = delete; - args_convert_vector(args_convert_vector &&) noexcept = default; - args_convert_vector &operator=(args_convert_vector &&) noexcept = default; + small_vector(const small_vector &) = delete; + small_vector &operator=(const small_vector &) = delete; + small_vector(small_vector &&) noexcept = default; + small_vector &operator=(small_vector &&) noexcept = default; - args_convert_vector(std::size_t count, bool value) { + small_vector(std::size_t count, bool value) { if (count > kInlineSize) { new (&m_repr.hvector) typename repr_type::heap_vector(count, value); } else { @@ -284,7 +288,24 @@ struct args_convert_vector { } } - void swap(args_convert_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); } + void set(std::size_t idx, bool value = true) { + if (is_inline()) { + auto &ha = m_repr.iarray; + assert(ha.size < kInlineSize); + const auto wbi = word_and_bit_index(idx); + assert(wbi.word < kWords); + assert(wbi.bit < kBitsPerWord); + if (value) { + ha.arr[wbi.word] |= (static_cast(1) << wbi.bit); + } else { + ha.arr[wbi.word] &= ~(static_cast(1) << wbi.bit); + } + } else { + m_repr.hvector.vec[idx] = value; + } + } + + void swap(small_vector &rhs) noexcept { std::swap(m_repr, rhs.m_repr); } private: struct WordAndBitIndex { @@ -326,5 +347,71 @@ struct args_convert_vector { bool is_inline() const { return m_repr.is_inline(); } }; +// Container to avoid heap allocation for N or fewer arguments. +template +using argument_vector = small_vector; + +// Container to avoid heap allocation for N or fewer booleans. +template +using args_convert_vector = small_vector; + +/// A small_vector of PyObject* that holds references and releases them on destruction. +/// This provides explicit ownership semantics without relying on py::object's +/// destructor, and avoids the need for reinterpret_cast when passing to vectorcall. +template +class ref_small_vector { +public: + ref_small_vector() = default; + + ~ref_small_vector() { + for (std::size_t i = 0; i < m_ptrs.size(); ++i) { + Py_XDECREF(m_ptrs[i]); + } + } + + // Disable copy (prevent accidental double-decref) + ref_small_vector(const ref_small_vector &) = delete; + ref_small_vector &operator=(const ref_small_vector &) = delete; + + // Move is allowed + ref_small_vector(ref_small_vector &&other) noexcept : m_ptrs(std::move(other.m_ptrs)) { + // other.m_ptrs is now empty, so its destructor won't decref anything + } + + ref_small_vector &operator=(ref_small_vector &&other) noexcept { + if (this != &other) { + // Decref our current contents + for (std::size_t i = 0; i < m_ptrs.size(); ++i) { + Py_XDECREF(m_ptrs[i]); + } + m_ptrs = std::move(other.m_ptrs); + } + return *this; + } + + /// Add a pointer, taking ownership (no incref, will decref on destruction) + void push_back_steal(PyObject *p) { m_ptrs.push_back(p); } + + /// Add a pointer, borrowing (increfs now, will decref on destruction) + void push_back_borrow(PyObject *p) { + Py_XINCREF(p); + m_ptrs.push_back(p); + } + + /// Add a null pointer (for PY_VECTORCALL_ARGUMENTS_OFFSET slot) + void push_back_null() { m_ptrs.push_back(nullptr); } + + void reserve(std::size_t sz) { m_ptrs.reserve(sz); } + + std::size_t size() const { return m_ptrs.size(); } + + PyObject *operator[](std::size_t idx) const { return m_ptrs[idx]; } + + PyObject *const *data() const { return m_ptrs.data(); } + +private: + small_vector m_ptrs; +}; + PYBIND11_NAMESPACE_END(detail) PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index f93272628d..45e8e46f89 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -684,7 +684,7 @@ class cpp_function : public function { rec->def->ml_name = rec->name; rec->def->ml_meth = reinterpret_cast(reinterpret_cast(dispatcher)); - rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; + rec->def->ml_flags = METH_FASTCALL | METH_KEYWORDS; object py_func_rec = detail::function_record_PyObject_New(); (reinterpret_cast(py_func_rec.ptr()))->cpp_func_rec @@ -847,7 +847,8 @@ class cpp_function : public function { } /// Main dispatch logic for calls to functions bound using pybind11 - static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) { + static PyObject * + dispatcher(PyObject *self, PyObject *const *args_in_arr, size_t nargsf, PyObject *kwnames_in) { using namespace detail; const function_record *overloads = function_record_ptr_from_PyObject(self); assert(overloads != nullptr); @@ -857,9 +858,9 @@ class cpp_function : public function { /* Need to know how many arguments + keyword arguments there are to pick the right overload */ - const auto n_args_in = static_cast(PyTuple_GET_SIZE(args_in)); + const auto n_args_in = static_cast(PyVectorcall_NARGS(nargsf)); - handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr, + handle parent = n_args_in > 0 ? args_in_arr[0] : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; auto self_value_and_holder = value_and_holder(); @@ -948,7 +949,7 @@ class cpp_function : public function { self_value_and_holder.type->dealloc(self_value_and_holder); } - call.init_self = PyTuple_GET_ITEM(args_in, 0); + call.init_self = args_in_arr[0]; call.args.emplace_back(reinterpret_cast(&self_value_and_holder)); call.args_convert.push_back(false); ++args_copied; @@ -959,17 +960,24 @@ class cpp_function : public function { for (; args_copied < args_to_copy; ++args_copied) { const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr; - if (kwargs_in && arg_rec && arg_rec->name - && dict_getitemstring(kwargs_in, arg_rec->name)) { + + /* if the argument is listed in the call site's kwargs, but the argument is + also fulfilled positionally, then the call can't match this overload. for + example, the call site is: foo(0, key=1) but our overload is foo(key:int) then + this call can't be for us, because it would be invalid. + */ + if (kwnames_in && arg_rec && arg_rec->name + && keyword_index(kwnames_in, arg_rec->name) >= 0) { bad_arg = true; break; } - handle arg(PyTuple_GET_ITEM(args_in, args_copied)); + handle arg(args_in_arr[args_copied]); if (arg_rec && !arg_rec->none && arg.is_none()) { bad_arg = true; break; } + call.args.push_back(arg); call.args_convert.push_back(arg_rec ? arg_rec->convert : true); } @@ -981,20 +989,12 @@ class cpp_function : public function { // to copy the rest into a py::args argument. size_t positional_args_copied = args_copied; - // We'll need to copy this if we steal some kwargs for defaults - dict kwargs = reinterpret_borrow(kwargs_in); - // 1.5. Fill in any missing pos_only args from defaults if they exist if (args_copied < func.nargs_pos_only) { for (; args_copied < func.nargs_pos_only; ++args_copied) { const auto &arg_rec = func.args[args_copied]; - handle value; - if (arg_rec.value) { - value = arg_rec.value; - } - if (value) { - call.args.push_back(value); + call.args.push_back(arg_rec.value); call.args_convert.push_back(arg_rec.convert); } else { break; @@ -1007,46 +1007,42 @@ class cpp_function : public function { } // 2. Check kwargs and, failing that, defaults that may help complete the list + small_vector used_kwargs( + kwnames_in ? static_cast(PyTuple_GET_SIZE(kwnames_in)) : 0, false); + size_t used_kwargs_count = 0; if (args_copied < num_args) { - bool copied_kwargs = false; - for (; args_copied < num_args; ++args_copied) { const auto &arg_rec = func.args[args_copied]; handle value; - if (kwargs_in && arg_rec.name) { - value = dict_getitemstring(kwargs.ptr(), arg_rec.name); + if (kwnames_in && arg_rec.name) { + ssize_t i = keyword_index(kwnames_in, arg_rec.name); + if (i >= 0) { + value = args_in_arr[n_args_in + static_cast(i)]; + used_kwargs.set(static_cast(i), true); + used_kwargs_count++; + } } - if (value) { - // Consume a kwargs value - if (!copied_kwargs) { - kwargs = reinterpret_steal(PyDict_Copy(kwargs.ptr())); - copied_kwargs = true; - } - if (PyDict_DelItemString(kwargs.ptr(), arg_rec.name) == -1) { - throw error_already_set(); - } - } else if (arg_rec.value) { + if (!value) { value = arg_rec.value; + if (!value) { + break; + } } if (!arg_rec.none && value.is_none()) { break; } - if (value) { - // If we're at the py::args index then first insert a stub for it to be - // replaced later - if (func.has_args && call.args.size() == func.nargs_pos) { - call.args.push_back(none()); - } - - call.args.push_back(value); - call.args_convert.push_back(arg_rec.convert); - } else { - break; + // If we're at the py::args index then first insert a stub for it to be + // replaced later + if (func.has_args && call.args.size() == func.nargs_pos) { + call.args.push_back(none()); } + + call.args.push_back(value); + call.args_convert.push_back(arg_rec.convert); } if (args_copied < num_args) { @@ -1056,47 +1052,46 @@ class cpp_function : public function { } // 3. Check everything was consumed (unless we have a kwargs arg) - if (kwargs && !kwargs.empty() && !func.has_kwargs) { + if (!func.has_kwargs && used_kwargs_count < used_kwargs.size()) { continue; // Unconsumed kwargs, but no py::kwargs argument to accept them } // 4a. If we have a py::args argument, create a new tuple with leftovers if (func.has_args) { - tuple extra_args; - if (args_to_copy == 0) { - // We didn't copy out any position arguments from the args_in tuple, so we - // can reuse it directly without copying: - extra_args = reinterpret_borrow(args_in); - } else if (positional_args_copied >= n_args_in) { - extra_args = tuple(0); + if (positional_args_copied >= n_args_in) { + call.args_ref = tuple(0); } else { size_t args_size = n_args_in - positional_args_copied; - extra_args = tuple(args_size); + tuple extra_args(args_size); for (size_t i = 0; i < args_size; ++i) { - extra_args[i] = PyTuple_GET_ITEM(args_in, positional_args_copied + i); + extra_args[i] = args_in_arr[positional_args_copied + i]; } + call.args_ref = std::move(extra_args); } if (call.args.size() <= func.nargs_pos) { - call.args.push_back(extra_args); + call.args.push_back(call.args_ref); } else { - call.args[func.nargs_pos] = extra_args; + call.args[func.nargs_pos] = call.args_ref; } call.args_convert.push_back(false); - call.args_ref = std::move(extra_args); } // 4b. If we have a py::kwargs, pass on any remaining kwargs if (func.has_kwargs) { - if (!kwargs.ptr()) { - kwargs = dict(); // If we didn't get one, send an empty one + dict kwargs; + for (size_t i = 0; i < used_kwargs.size(); ++i) { + if (!used_kwargs[i]) { + kwargs[PyTuple_GET_ITEM(kwnames_in, i)] = args_in_arr[n_args_in + i]; + } } call.args.push_back(kwargs); call.args_convert.push_back(false); call.kwargs_ref = std::move(kwargs); } -// 5. Put everything in a vector. Not technically step 5, we've been building it -// in `call.args` all along. + // 5. Put everything in a vector. Not technically step 5, we've been building it + // in `call.args` all along. + #if defined(PYBIND11_DETAILED_ERROR_MESSAGES) if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs) { pybind11_fail("Internal error: function call dispatcher inserted wrong number " @@ -1227,40 +1222,37 @@ class cpp_function : public function { msg += '\n'; } msg += "\nInvoked with: "; - auto args_ = reinterpret_borrow(args_in); bool some_args = false; - for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) { + for (size_t ti = overloads->is_constructor ? 1 : 0; ti < n_args_in; ++ti) { if (!some_args) { some_args = true; } else { msg += ", "; } try { - msg += pybind11::repr(args_[ti]); + msg += pybind11::repr(args_in_arr[ti]); } catch (const error_already_set &) { msg += ""; } } - if (kwargs_in) { - auto kwargs = reinterpret_borrow(kwargs_in); - if (!kwargs.empty()) { - if (some_args) { - msg += "; "; + if (kwnames_in && PyTuple_GET_SIZE(kwnames_in) > 0) { + if (some_args) { + msg += "; "; + } + msg += "kwargs: "; + bool first = true; + for (size_t i = 0; i < static_cast(PyTuple_GET_SIZE(kwnames_in)); ++i) { + if (first) { + first = false; + } else { + msg += ", "; } - msg += "kwargs: "; - bool first = true; - for (const auto &kwarg : kwargs) { - if (first) { - first = false; - } else { - msg += ", "; - } - msg += pybind11::str("{}=").format(kwarg.first); - try { - msg += pybind11::repr(kwarg.second); - } catch (const error_already_set &) { - msg += ""; - } + msg += reinterpret_borrow(PyTuple_GET_ITEM(kwnames_in, i)); + msg += '='; + try { + msg += pybind11::repr(args_in_arr[n_args_in + i]); + } catch (const error_already_set &) { + msg += ""; } } } @@ -1295,6 +1287,28 @@ class cpp_function : public function { } return result.ptr(); } + + static ssize_t keyword_index(PyObject *haystack, char const *needle) { + /* kwargs is usually very small (<= 5 entries). The arg strings are typically interned. + * CPython itself implements the search this way, first comparing all pointers ... which is + * cheap and will work if the strings are interned. If it fails, then it falls back to a + * second lexicographic check. This is wildly expensive for huge argument lists, but those + * are incredibly rare so we optimize for the vastly common case of just a couple of args. + */ + auto n = PyTuple_GET_SIZE(haystack); + auto s = reinterpret_steal(PyUnicode_InternFromString(needle)); + for (ssize_t i = 0; i < n; ++i) { + if (PyTuple_GET_ITEM(haystack, i) == s.ptr()) { + return i; + } + } + for (ssize_t i = 0; i < n; ++i) { + if (PyUnicode_Compare(PyTuple_GET_ITEM(haystack, i), s.ptr()) == 0) { + return i; + } + } + return -1; + } }; PYBIND11_NAMESPACE_BEGIN(detail) diff --git a/tests/test_kwargs_and_defaults.py b/tests/test_kwargs_and_defaults.py index d41e505583..a7745d1ecb 100644 --- a/tests/test_kwargs_and_defaults.py +++ b/tests/test_kwargs_and_defaults.py @@ -458,7 +458,8 @@ def test_args_refcount(): assert refcount(myval) == expected exp3 = refcount(myval, myval, myval) - assert m.args_refcount(myval, myval, myval) == (exp3, exp3, exp3) + # if we have to create a new tuple internally, then it will hold an extra reference for each item in it. + assert m.args_refcount(myval, myval, myval) == (exp3 + 3, exp3 + 3, exp3 + 3) assert refcount(myval) == expected # This function takes the first arg as a `py::object` and the rest as a `py::args`. Unlike the