diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt index 6f823a818ce..864e34ddaba 100644 --- a/dpctl_ext/tensor/CMakeLists.txt +++ b/dpctl_ext/tensor/CMakeLists.txt @@ -51,7 +51,7 @@ set(_tensor_impl_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_reshape.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_roll.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/integer_advanced_indexing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_advanced_indexing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/eye_ctor.cpp @@ -93,7 +93,7 @@ endif() set(_no_fast_math_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_and_cast_usm_to_usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/clip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/where.cpp ) diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 8cd8a1896b2..d46cf4a4a65 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -37,10 +37,21 @@ to_numpy, ) from dpctl_ext.tensor._ctors import ( + arange, + asarray, + empty, + empty_like, eye, full, + full_like, + linspace, + meshgrid, + ones, + ones_like, tril, triu, + zeros, + zeros_like, ) from dpctl_ext.tensor._indexing_functions import ( extract, @@ -52,8 +63,20 @@ take_along_axis, ) from dpctl_ext.tensor._manipulation_functions import ( + broadcast_arrays, + broadcast_to, + concat, + expand_dims, + flip, + moveaxis, + permute_dims, repeat, roll, + squeeze, + stack, + swapaxes, + tile, + unstack, ) from dpctl_ext.tensor._reshape import reshape @@ -61,19 +84,35 @@ from ._type_utils import can_cast, finfo, iinfo, isdtype, result_type __all__ = [ + "arange", + "asarray", "asnumpy", "astype", + "broadcast_arrays", + "broadcast_to", "can_cast", + "concat", "copy", "clip", + "empty", + "empty_like", "extract", + "expand_dims", "eye", "finfo", + "flip", "from_numpy", "full", + "full_like", "iinfo", "isdtype", + "linspace", + "meshgrid", + "moveaxis", + "permute_dims", "nonzero", + "ones", + "ones_like", "place", "put", "put_along_axis", @@ -81,10 +120,17 @@ "reshape", "result_type", "roll", + "squeeze", + "stack", + "swapaxes", "take", "take_along_axis", + "tile", "to_numpy", "tril", "triu", + "unstack", "where", + "zeros", + "zeros_like", ] diff --git a/dpctl_ext/tensor/_clip.py b/dpctl_ext/tensor/_clip.py index 50d3ecd568e..9fc42abc0d8 100644 --- a/dpctl_ext/tensor/_clip.py +++ b/dpctl_ext/tensor/_clip.py @@ -163,7 +163,7 @@ def _clip_none(x, val, out, order, _binary_fn): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(val, dpt.usm_ndarray): if ( @@ -171,12 +171,12 @@ def _clip_none(x, val, out, order, _binary_fn): and not ti._same_logical_tensors(val, out) and val_dtype == res_dt ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(val, dpt.usm_ndarray): val_ary = val else: - val_ary = dpt.asarray(val, dtype=val_dtype, sycl_queue=exec_q) + val_ary = dpt_ext.asarray(val, dtype=val_dtype, sycl_queue=exec_q) if order == "A": order = ( @@ -197,7 +197,7 @@ def _clip_none(x, val, out, order, _binary_fn): x, val_ary, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -205,9 +205,9 @@ def _clip_none(x, val, out, order, _binary_fn): order=order, ) if x_shape != res_shape: - x = dpt.broadcast_to(x, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) if val_ary.shape != res_shape: - val_ary = dpt.broadcast_to(val_ary, res_shape) + val_ary = dpt_ext.broadcast_to(val_ary, res_shape) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events ht_binary_ev, binary_ev = _binary_fn( @@ -229,7 +229,7 @@ def _clip_none(x, val, out, order, _binary_fn): if order == "K": buf = _empty_like_orderK(val_ary, res_dt) else: - buf = dpt.empty_like(val_ary, dtype=res_dt, order=order) + buf = dpt_ext.empty_like(val_ary, dtype=res_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -242,7 +242,7 @@ def _clip_none(x, val, out, order, _binary_fn): x, buf, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -251,8 +251,8 @@ def _clip_none(x, val, out, order, _binary_fn): ) if x_shape != res_shape: - x = dpt.broadcast_to(x, res_shape) - buf = dpt.broadcast_to(buf, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) + buf = dpt_ext.broadcast_to(buf, res_shape) ht_binary_ev, binary_ev = _binary_fn( src1=x, src2=buf, @@ -353,14 +353,14 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) else: return out else: if order == "K": out = _empty_like_orderK(x, x.dtype) else: - out = dpt.empty_like(x, order=order) + out = dpt_ext.empty_like(x, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -519,7 +519,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(min, dpt.usm_ndarray): if ( @@ -527,7 +527,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): and not ti._same_logical_tensors(min, out) and buf1_dt is None ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(max, dpt.usm_ndarray): if ( @@ -535,16 +535,16 @@ def clip(x, /, min=None, max=None, out=None, order="K"): and not ti._same_logical_tensors(max, out) and buf2_dt is None ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(min, dpt.usm_ndarray): a_min = min else: - a_min = dpt.asarray(min, dtype=min_dtype, sycl_queue=exec_q) + a_min = dpt_ext.asarray(min, dtype=min_dtype, sycl_queue=exec_q) if isinstance(max, dpt.usm_ndarray): a_max = max else: - a_max = dpt.asarray(max, dtype=max_dtype, sycl_queue=exec_q) + a_max = dpt_ext.asarray(max, dtype=max_dtype, sycl_queue=exec_q) if order == "A": order = ( @@ -572,7 +572,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -580,11 +580,11 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) if x_shape != res_shape: - x = dpt.broadcast_to(x, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) if a_min.shape != res_shape: - a_min = dpt.broadcast_to(a_min, res_shape) + a_min = dpt_ext.broadcast_to(a_min, res_shape) if a_max.shape != res_shape: - a_max = dpt.broadcast_to(a_max, res_shape) + a_max = dpt_ext.broadcast_to(a_max, res_shape) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_binary_ev, binary_ev = ti._clip( @@ -612,7 +612,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(a_max, buf2_dt) else: - buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order) + buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -631,7 +631,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -639,10 +639,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt.broadcast_to(x, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) if a_min.shape != res_shape: - a_min = dpt.broadcast_to(a_min, res_shape) - buf2 = dpt.broadcast_to(buf2, res_shape) + a_min = dpt_ext.broadcast_to(a_min, res_shape) + buf2 = dpt_ext.broadcast_to(buf2, res_shape) ht_binary_ev, binary_ev = ti._clip( src=x, min=a_min, @@ -668,7 +668,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(a_min, buf1_dt) else: - buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order) + buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -687,7 +687,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -695,10 +695,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt.broadcast_to(x, res_shape) - buf1 = dpt.broadcast_to(buf1, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) + buf1 = dpt_ext.broadcast_to(buf1, res_shape) if a_max.shape != res_shape: - a_max = dpt.broadcast_to(a_max, res_shape) + a_max = dpt_ext.broadcast_to(a_max, res_shape) ht_binary_ev, binary_ev = ti._clip( src=x, min=buf1, @@ -736,7 +736,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(a_min, buf1_dt) else: - buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order) + buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -747,7 +747,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(a_max, buf2_dt) else: - buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order) + buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order) ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_max, dst=buf2, sycl_queue=exec_q, depends=dep_evs ) @@ -758,7 +758,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): x, buf1, buf2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -766,9 +766,9 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt.broadcast_to(x, res_shape) - buf1 = dpt.broadcast_to(buf1, res_shape) - buf2 = dpt.broadcast_to(buf2, res_shape) + x = dpt_ext.broadcast_to(x, res_shape) + buf1 = dpt_ext.broadcast_to(buf1, res_shape) + buf2 = dpt_ext.broadcast_to(buf2, res_shape) ht_, clip_ev = ti._clip( src=x, min=buf1, diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py index af72544a8b0..878dabc581d 100644 --- a/dpctl_ext/tensor/_copy_utils.py +++ b/dpctl_ext/tensor/_copy_utils.py @@ -91,7 +91,7 @@ def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None): ) else: Xusm_dtype = dt - Xusm = dpt.empty( + Xusm = dpt_ext.empty( Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue ) _copy_from_numpy_into(Xusm, Xnp) @@ -159,7 +159,7 @@ def _extract_impl(ary, ary_mask, axis=0): elif isinstance(ary_mask, np.ndarray): dst_usm_type = ary.usm_type exec_q = ary.sycl_queue - ary_mask = dpt.asarray( + ary_mask = dpt_ext.asarray( ary_mask, usm_type=dst_usm_type, sycl_queue=exec_q ) else: @@ -176,7 +176,7 @@ def _extract_impl(ary, ary_mask, axis=0): ) mask_nelems = ary_mask.size cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64 - cumsum = dpt.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device) + cumsum = dpt_ext.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device) exec_q = cumsum.sycl_queue _manager = dpctl.utils.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -184,7 +184,7 @@ def _extract_impl(ary, ary_mask, axis=0): ary_mask, cumsum, sycl_queue=exec_q, depends=dep_evs ) dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :] - dst = dpt.empty( + dst = dpt_ext.empty( dst_shape, dtype=ary.dtype, usm_type=dst_usm_type, device=ary.device ) if dst.size == 0: @@ -247,7 +247,7 @@ def _nonzero_impl(ary): usm_type = ary.usm_type mask_nelems = ary.size cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64 - cumsum = dpt.empty( + cumsum = dpt_ext.empty( mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C" ) _manager = dpctl.utils.SequentialOrderManager[exec_q] @@ -256,7 +256,7 @@ def _nonzero_impl(ary): ary, cumsum, sycl_queue=exec_q, depends=dep_evs ) indexes_dt = ti.default_device_index_type(exec_q.sycl_device) - indexes = dpt.empty( + indexes = dpt_ext.empty( (ary.ndim, mask_count), dtype=indexes_dt, usm_type=usm_type, @@ -284,7 +284,7 @@ def _prepare_indices_arrays(inds, q, usm_type): lambda ind: ( ind if isinstance(ind, dpt.usm_ndarray) - else dpt.asarray(ind, usm_type=usm_type, sycl_queue=q) + else dpt_ext.asarray(ind, usm_type=usm_type, sycl_queue=q) ), inds, ) @@ -306,7 +306,7 @@ def _prepare_indices_arrays(inds, q, usm_type): ) # broadcast - inds = dpt.broadcast_arrays(*inds) + inds = dpt_ext.broadcast_arrays(*inds) return inds @@ -332,7 +332,7 @@ def _put_multi_index(ary, inds, p, vals, mode=0): if exec_q is not None: if not isinstance(vals, dpt.usm_ndarray): - vals = dpt.asarray( + vals = dpt_ext.asarray( vals, dtype=ary.dtype, usm_type=coerced_usm_type, @@ -368,7 +368,7 @@ def _put_multi_index(ary, inds, p, vals, mode=0): rhs = vals else: rhs = dpt_ext.astype(vals, ary.dtype) - rhs = dpt.broadcast_to(rhs, expected_vals_shape) + rhs = dpt_ext.broadcast_to(rhs, expected_vals_shape) _manager = dpctl.utils.SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events hev, put_ev = ti._put( @@ -418,7 +418,7 @@ def _take_multi_index(ary, inds, p, mode=0): if 0 in ary_sh[p:p_end] and ind0.size != 0: raise IndexError("cannot take non-empty indices from an empty axis") res_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:] - res = dpt.empty( + res = dpt_ext.empty( res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q ) _manager = dpctl.utils.SequentialOrderManager[exec_q] @@ -681,7 +681,9 @@ def _make_empty_like_orderK(x, dt, usm_type, dev): inv_perm = sorted(range(x.ndim), key=lambda i: perm[i]) sh = x.shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + R = dpt_ext.empty( + sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) if min(st) < 0: st_sorted = [st[i] for i in perm] sl = tuple( @@ -693,7 +695,7 @@ def _make_empty_like_orderK(x, dt, usm_type, dev): for i in range(x.ndim) ) R = R[sl] - return dpt.permute_dims(R, inv_perm) + return dpt_ext.permute_dims(R, inv_perm) def _empty_like_orderK(x, dt, usm_type=None, dev=None): @@ -712,11 +714,11 @@ def _empty_like_orderK(x, dt, usm_type=None, dev=None): dev = x.device fl = x.flags if fl["C"] or x.size <= 1: - return dpt.empty_like( + return dpt_ext.empty_like( x, dtype=dt, usm_type=usm_type, device=dev, order="C" ) elif fl["F"]: - return dpt.empty_like( + return dpt_ext.empty_like( x, dtype=dt, usm_type=usm_type, device=dev, order="F" ) return _make_empty_like_orderK(x, dt, usm_type, dev) @@ -734,11 +736,11 @@ def _from_numpy_empty_like_orderK(x, dt, usm_type, dev): raise TypeError(f"Expected numpy.ndarray, got {type(x)}") fl = x.flags if fl["C"] or x.size <= 1: - return dpt.empty( + return dpt_ext.empty( x.shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) elif fl["F"]: - return dpt.empty( + return dpt_ext.empty( x.shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) return _make_empty_like_orderK(x, dt, usm_type, dev) @@ -758,11 +760,11 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): fl1 = X1.flags fl2 = X2.flags if fl1["C"] or fl2["C"]: - return dpt.empty( + return dpt_ext.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) if fl1["F"] and fl2["F"]: - return dpt.empty( + return dpt_ext.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) st1 = list(X1.strides) @@ -785,7 +787,9 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): st2_sorted = [st2[i] for i in perm] sh = res_shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + R = dpt_ext.empty( + sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) if max(min(st1_sorted), min(st2_sorted)) < 0: sl = tuple( ( @@ -796,7 +800,7 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): for i in range(nd1) ) R = R[sl] - return dpt.permute_dims(R, inv_perm) + return dpt_ext.permute_dims(R, inv_perm) def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): @@ -823,11 +827,11 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): fl2 = X2.flags fl3 = X3.flags if fl1["C"] or fl2["C"] or fl3["C"]: - return dpt.empty( + return dpt_ext.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) if fl1["F"] and fl2["F"] and fl3["F"]: - return dpt.empty( + return dpt_ext.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) st1 = list(X1.strides) @@ -855,7 +859,9 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): st3_sorted = [st3[i] for i in perm] sh = res_shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") + R = dpt_ext.empty( + sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" + ) if max(min(st1_sorted), min(st2_sorted), min(st3_sorted)) < 0: sl = tuple( ( @@ -870,7 +876,7 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): for i in range(nd1) ) R = R[sl] - return dpt.permute_dims(R, inv_perm) + return dpt_ext.permute_dims(R, inv_perm) def copy(usm_ary, /, *, order="K"): diff --git a/dpctl_ext/tensor/_ctors.py b/dpctl_ext/tensor/_ctors.py index 5a9e07c7334..532802c0c51 100644 --- a/dpctl_ext/tensor/_ctors.py +++ b/dpctl_ext/tensor/_ctors.py @@ -30,16 +30,290 @@ from numbers import Number import dpctl +import dpctl.memory as dpm import dpctl.tensor as dpt import dpctl.utils import numpy as np from dpctl.tensor._data_types import _get_dtype from dpctl.tensor._device import normalize_queue_device +from dpctl.tensor._usmarray import _is_object_with_buffer_protocol # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor import dpctl_ext.tensor as dpt_ext import dpctl_ext.tensor._tensor_impl as ti +from dpctl_ext.tensor._copy_utils import ( + _empty_like_orderK, + _from_numpy_empty_like_orderK, +) + +__doc__ = "Implementation of creation functions in :module:`dpctl.tensor`" + +_empty_tuple = () +_host_set = frozenset([None]) + + +def _array_info_dispatch(obj): + if isinstance(obj, dpt.usm_ndarray): + return obj.shape, obj.dtype, frozenset([obj.sycl_queue]) + if isinstance(obj, np.ndarray): + return obj.shape, obj.dtype, _host_set + if isinstance(obj, range): + return (len(obj),), int, _host_set + if isinstance(obj, bool): + return _empty_tuple, bool, _host_set + if isinstance(obj, float): + return _empty_tuple, float, _host_set + if isinstance(obj, int): + return _empty_tuple, int, _host_set + if isinstance(obj, complex): + return _empty_tuple, complex, _host_set + if isinstance( + obj, + ( + list, + tuple, + ), + ): + return _array_info_sequence(obj) + if _is_object_with_buffer_protocol(obj): + np_obj = np.array(obj) + return np_obj.shape, np_obj.dtype, _host_set + if hasattr(obj, "__usm_ndarray__"): + usm_ar = obj.__usm_ndarray__ + if isinstance(usm_ar, dpt.usm_ndarray): + return usm_ar.shape, usm_ar.dtype, frozenset([usm_ar.sycl_queue]) + if hasattr(obj, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(obj) + return usm_ar.shape, usm_ar.dtype, frozenset([usm_ar.sycl_queue]) + + +def _array_info_sequence(li): + if not isinstance(li, (list, tuple, range)): + raise TypeError(f"Expected list, tuple, or range, got {type(li)}") + n = len(li) + dim = None + dt = None + device = frozenset() + for el in li: + el_dim, el_dt, el_dev = _array_info_dispatch(el) + if dim is None: + dim = el_dim + dt = np.promote_types(el_dt, el_dt) + device = device.union(el_dev) + elif el_dim == dim: + dt = np.promote_types(dt, el_dt) + device = device.union(el_dev) + else: + raise ValueError(f"Inconsistent dimensions, {dim} and {el_dim}") + if dim is None: + dim = () + dt = float + device = _host_set + return (n,) + dim, dt, device + + +def _asarray_from_numpy_ndarray( + ary, dtype=None, usm_type=None, sycl_queue=None, order="K" +): + if not isinstance(ary, np.ndarray): + raise TypeError(f"Expected numpy.ndarray, got {type(ary)}") + if usm_type is None: + usm_type = "device" + copy_q = normalize_queue_device(sycl_queue=None, device=sycl_queue) + if ary.dtype.char not in "?bBhHiIlLqQefdFD": + raise TypeError( + f"Numpy array of data type {ary.dtype} is not supported. " + "Please convert the input to an array with numeric data type." + ) + if dtype is None: + # deduce device-representable output data type + dtype = _map_to_device_dtype(ary.dtype, copy_q) + _ensure_native_dtype_device_support(dtype, copy_q.sycl_device) + f_contig = ary.flags["F"] + c_contig = ary.flags["C"] + fc_contig = f_contig or c_contig + if order == "A": + order = "F" if f_contig and not c_contig else "C" + if order == "K" and fc_contig: + order = "C" if c_contig else "F" + if order == "K": + # new USM allocation + res = _from_numpy_empty_like_orderK(ary, dtype, usm_type, copy_q) + else: + res = dpt.usm_ndarray( + ary.shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": copy_q}, + ) + res[...] = ary + return res + + +def _asarray_from_seq( + seq_obj, + seq_shape, + seq_dt, + alloc_q, + exec_q, + dtype=None, + usm_type=None, + order="C", +): + """`seq_obj` is a sequence""" + if usm_type is None: + usm_types_in_seq = [] + _usm_types_walker(seq_obj, usm_types_in_seq) + usm_type = dpctl.utils.get_coerced_usm_type(usm_types_in_seq) + dpctl.utils.validate_usm_type(usm_type) + if dtype is None: + dtype = _map_to_device_dtype(seq_dt, alloc_q) + else: + _mapped_dt = _map_to_device_dtype(dtype, alloc_q) + if _mapped_dt != dtype: + raise ValueError( + f"Device {alloc_q.sycl_device} " + f"does not support {dtype} natively." + ) + dtype = _mapped_dt + if order in "KA": + order = "C" + if isinstance(exec_q, dpctl.SyclQueue): + res = dpt_ext.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + _manager = dpctl.utils.SequentialOrderManager[exec_q] + _device_copy_walker(seq_obj, res, _manager) + return res + else: + res = dpt_ext.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + _copy_through_host_walker(seq_obj, res) + return res + + +def _asarray_from_seq_single_device( + obj, + seq_shape, + seq_dt, + seq_dev, + dtype=None, + usm_type=None, + sycl_queue=None, + order="C", +): + if sycl_queue is None: + exec_q = seq_dev + alloc_q = seq_dev + else: + exec_q = dpctl.utils.get_execution_queue( + ( + sycl_queue, + seq_dev, + ) + ) + alloc_q = sycl_queue + return _asarray_from_seq( + obj, + seq_shape, + seq_dt, + alloc_q, + exec_q, + dtype=dtype, + usm_type=usm_type, + order=order, + ) + + +def _asarray_from_usm_ndarray( + usm_ndary, + dtype=None, + copy=None, + usm_type=None, + sycl_queue=None, + order="K", +): + if not isinstance(usm_ndary, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ndary)}" + ) + if usm_type is None: + usm_type = usm_ndary.usm_type + if sycl_queue is not None: + exec_q = dpctl.utils.get_execution_queue( + [usm_ndary.sycl_queue, sycl_queue] + ) + copy_q = normalize_queue_device(sycl_queue=sycl_queue, device=exec_q) + else: + copy_q = usm_ndary.sycl_queue + if dtype is None: + dtype = _map_to_device_dtype(usm_ndary.dtype, copy_q) + # Conditions for zero copy: + can_zero_copy = copy is not True + # dtype is unchanged + can_zero_copy = can_zero_copy and dtype == usm_ndary.dtype + # USM allocation type is unchanged + can_zero_copy = can_zero_copy and usm_type == usm_ndary.usm_type + # sycl_queue is unchanged + can_zero_copy = can_zero_copy and copy_q is usm_ndary.sycl_queue + # order is unchanged + c_contig = usm_ndary.flags.c_contiguous + f_contig = usm_ndary.flags.f_contiguous + fc_contig = usm_ndary.flags.forc + if can_zero_copy: + if order == "C" and c_contig: + pass + elif order == "F" and f_contig: + pass + elif order == "A" and fc_contig: + pass + elif order == "K": + pass + else: + can_zero_copy = False + if copy is False and can_zero_copy is False: + raise ValueError("asarray(..., copy=False) is not possible") + if can_zero_copy: + return usm_ndary + if order == "A": + order = "F" if f_contig and not c_contig else "C" + if order == "K" and fc_contig: + order = "C" if c_contig else "F" + if order == "K": + _ensure_native_dtype_device_support(dtype, copy_q.sycl_device) + res = _empty_like_orderK(usm_ndary, dtype, usm_type, copy_q) + else: + _ensure_native_dtype_device_support(dtype, copy_q.sycl_device) + res = dpt.usm_ndarray( + usm_ndary.shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": copy_q}, + ) + eq = dpctl.utils.get_execution_queue([usm_ndary.sycl_queue, copy_q]) + if eq is not None: + _manager = dpctl.utils.SequentialOrderManager[eq] + dep_evs = _manager.submitted_events + hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=usm_ndary, dst=res, sycl_queue=eq, depends=dep_evs + ) + _manager.add_event_pair(hev, cpy_ev) + else: + tmp = dpt_ext.asnumpy(usm_ndary) + res[...] = tmp + return res def _cast_fill_val(fill_val, dt): @@ -58,6 +332,99 @@ def _cast_fill_val(fill_val, dt): return fill_val +def _coerce_and_infer_dt(*args, dt, sycl_queue, err_msg, allow_bool=False): + """Deduce arange type from sequence spec""" + nd, seq_dt, d = _array_info_sequence(args) + if d != _host_set or nd != (len(args),): + raise ValueError(err_msg) + dt = _get_dtype(dt, sycl_queue, ref_type=seq_dt) + if np.issubdtype(dt, np.integer): + return tuple(int(v) for v in args), dt + if np.issubdtype(dt, np.floating): + return tuple(float(v) for v in args), dt + if np.issubdtype(dt, np.complexfloating): + return tuple(complex(v) for v in args), dt + if allow_bool and dt.char == "?": + return tuple(bool(v) for v in args), dt + raise ValueError(f"Data type {dt} is not supported") + + +def _copy_through_host_walker(seq_o, usm_res): + if isinstance(seq_o, dpt.usm_ndarray): + if ( + dpctl.utils.get_execution_queue( + ( + usm_res.sycl_queue, + seq_o.sycl_queue, + ) + ) + is None + ): + usm_res[...] = dpt.asnumpy(seq_o).copy() + return + else: + usm_res[...] = seq_o + if hasattr(seq_o, "__usm_ndarray__"): + usm_arr = seq_o.__usm_ndarray__ + if isinstance(usm_arr, dpt.usm_ndarray): + _copy_through_host_walker(usm_arr, usm_res) + return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + if ( + dpctl.utils.get_execution_queue( + ( + usm_res.sycl_queue, + usm_ar.sycl_queue, + ) + ) + is None + ): + usm_res[...] = dpt_ext.asnumpy(usm_ar).copy() + else: + usm_res[...] = usm_ar + return + if _is_object_with_buffer_protocol(seq_o): + np_ar = np.asarray(seq_o) + usm_res[...] = np_ar + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _copy_through_host_walker(el, usm_res[i]) + return + usm_res[...] = np.asarray(seq_o) + + +def _device_copy_walker(seq_o, res, _manager): + if isinstance(seq_o, dpt.usm_ndarray): + exec_q = res.sycl_queue + deps = _manager.submitted_events + ht_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=seq_o, dst=res, sycl_queue=exec_q, depends=deps + ) + _manager.add_event_pair(ht_ev, cpy_ev) + return + if hasattr(seq_o, "__usm_ndarray__"): + usm_arr = seq_o.__usm_ndarray__ + if isinstance(usm_arr, dpt.usm_ndarray): + _device_copy_walker(usm_arr, res, _manager) + return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + exec_q = res.sycl_queue + deps = _manager.submitted_events + ht_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=usm_ar, dst=res, sycl_queue=exec_q, depends=deps + ) + _manager.add_event_pair(ht_ev, cpy_ev) + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _device_copy_walker(el, res[i], _manager) + return + raise TypeError + + def _ensure_native_dtype_device_support(dtype, dev) -> None: """Check that dtype is natively supported by device. @@ -90,54 +457,1091 @@ def _ensure_native_dtype_device_support(dtype, dev) -> None: ) -def _to_scalar(obj, sc_ty): - """A way to convert object to NumPy scalar type. - Raises OverflowError if obj can not be represented - using the requested scalar type. - """ - zd_arr = np.asarray(obj, dtype=sc_ty) - return zd_arr[()] +def _get_arange_length(start, stop, step): + """Compute length of arange sequence""" + span = stop - start + if hasattr(step, "__float__") and hasattr(span, "__float__"): + return _round_for_arange(span / step) + tmp = span / step + if hasattr(tmp, "__complex__"): + tmp = complex(tmp) + tmp = tmp.real + else: + tmp = float(tmp) + return _round_for_arange(tmp) + + +def _map_to_device_dtype(dt, q): + dtc = dt.char + if dtc == "?" or np.issubdtype(dt, np.integer): + return dt + d = q.sycl_device + if np.issubdtype(dt, np.floating): + if dtc == "f": + return dt + if dtc == "d" and d.has_aspect_fp64: + return dt + if dtc == "e" and d.has_aspect_fp16: + return dt + return dpt.dtype("f4") + if np.issubdtype(dt, np.complexfloating): + if dtc == "F": + return dt + if dtc == "D" and d.has_aspect_fp64: + return dt + return dpt.dtype("c8") + raise RuntimeError(f"Unrecognized data type '{dt}' encountered.") + + +def _normalize_order(order, arr): + """ + Utility function for processing the `order` keyword of array-like + constructors, which support `"K"` and `"A"` orders. + """ + arr_flags = arr.flags + f_contig = arr_flags["F"] + c_contig = arr_flags["C"] + if order == "A": + order = "F" if f_contig and not c_contig else "C" + if order == "K" and (f_contig or c_contig): + order = "C" if c_contig else "F" + return order + + +def _round_for_arange(tmp): + k = int(tmp) + if k >= 0 and float(k) < tmp: + tmp = tmp + 1 + return tmp + + +def _to_scalar(obj, sc_ty): + """A way to convert object to NumPy scalar type. + Raises OverflowError if obj can not be represented + using the requested scalar type. + """ + zd_arr = np.asarray(obj, dtype=sc_ty) + return zd_arr[()] + + +def _usm_ndarray_from_suai(obj): + sua_iface = obj.__sycl_usm_array_interface__ + membuf = dpm.as_usm_memory(obj) + ary = dpt.usm_ndarray( + sua_iface["shape"], + dtype=sua_iface["typestr"], + buffer=membuf, + strides=sua_iface.get("strides", None), + ) + _data_field = sua_iface["data"] + if isinstance(_data_field, tuple) and len(_data_field) > 1: + ro_field = _data_field[1] + else: + ro_field = False + if ro_field: + ary.flags["W"] = False + return ary + + +def _usm_types_walker(o, usm_types_list): + if isinstance(o, dpt.usm_ndarray): + usm_types_list.append(o.usm_type) + return + if hasattr(o, "__usm_ndarray__"): + usm_arr = o.__usm_ndarray__ + if isinstance(usm_arr, dpt.usm_ndarray): + usm_types_list.append(usm_arr.usm_type) + return + if hasattr(o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(o) + usm_types_list.append(usm_ar.usm_type) + return + if _is_object_with_buffer_protocol(o): + return + if isinstance(o, (int, bool, float, complex)): + return + if isinstance(o, (list, tuple, range)): + for el in o: + _usm_types_walker(el, usm_types_list) + return + raise TypeError + + +def arange( + start, + /, + stop=None, + step=1, + *, + dtype=None, + device=None, + usm_type="device", + sycl_queue=None, +): + """ + Returns evenly spaced values within the half-open interval [start, stop) + as a one-dimensional array. + + Args: + start: + Starting point of the interval + stop: + Ending point of the interval. Default: ``None`` + step: Increment of the returned sequence. Default: ``1`` + dtype: Output array data type. Default: ``None`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + Array populated with evenly spaced values. + """ + if stop is None: + stop = start + start = 0 + if step is None: + step = 1 + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + is_bool = False + if dtype: + is_bool = (dtype is bool) or (dpt.dtype(dtype) == dpt.bool) + _, dt = _coerce_and_infer_dt( + start, + stop, + step, + dt=dpt.int8 if is_bool else dtype, + sycl_queue=sycl_queue, + err_msg="start, stop, and step must be Python scalars", + allow_bool=False, + ) + try: + tmp = _get_arange_length(start, stop, step) + sh = max(int(tmp), 0) + except TypeError: + sh = 0 + if is_bool and sh > 2: + raise ValueError("no fill-function for boolean data type") + res = dpt.usm_ndarray( + (sh,), + dtype=dt, + buffer=usm_type, + order="C", + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + sc_ty = dt.type + _first = _to_scalar(start, sc_ty) + if sh > 1: + _second = _to_scalar(start + step, sc_ty) + if dt in [dpt.uint8, dpt.uint16, dpt.uint32, dpt.uint64]: + int64_ty = dpt.int64.type + _step = int64_ty(_second) - int64_ty(_first) + else: + _step = _second - _first + _step = sc_ty(_step) + else: + _step = sc_ty(1) + _start = _first + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating newly allocated array, no task dependencies + hev, lin_ev = ti._linspace_step(_start, _step, res, sycl_queue) + _manager.add_event_pair(hev, lin_ev) + if is_bool: + res_out = dpt.usm_ndarray( + (sh,), + dtype=dpt.bool, + buffer=usm_type, + order="C", + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + hev_cpy, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=res, dst=res_out, sycl_queue=sycl_queue, depends=[lin_ev] + ) + _manager.add_event_pair(hev_cpy, cpy_ev) + return res_out + return res + + +def asarray( + obj, + /, + *, + dtype=None, + device=None, + copy=None, + usm_type=None, + sycl_queue=None, + order="K", +): + """ + Converts input object to :class:`dpctl.tensor.usm_ndarray`. + + Args: + obj: Python object to convert. Can be an instance of + :class:`dpctl.tensor.usm_ndarray`, + an object representing SYCL USM allocation and implementing + ``__sycl_usm_array_interface__`` protocol, an instance + of :class:`numpy.ndarray`, an object supporting Python buffer + protocol, a Python scalar, or a (possibly nested) sequence of + Python scalars. + dtype (data type, optional): + output array data type. If ``dtype`` is + ``None``, the output array data type is inferred from data types in + ``obj``. Default: ``None`` + copy (`bool`, optional): + boolean indicating whether or not to copy the + input. If ``True``, always creates a copy. If ``False``, the + need to copy raises :exc:`ValueError`. If ``None``, tries to reuse + existing memory allocations if possible, but allows to perform + a copy otherwise. Default: ``None`` + order (``"C"``, ``"F"``, ``"A"``, ``"K"``, optional): + memory layout of the output array. Default: ``"K"`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + Array created from input object. + """ + # 1. Check that copy is a valid keyword + if copy not in [None, True, False]: + raise TypeError( + "Recognized copy keyword values should be True, False, or None" + ) + # 2. Check that dtype is None, or a valid dtype + if dtype is not None: + dtype = dpt.dtype(dtype) + # 3. Validate order + if not isinstance(order, str): + raise TypeError( + f"Expected order keyword to be of type str, got {type(order)}" + ) + if len(order) == 0 or order[0] not in "KkAaCcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'K', 'A', 'F', or 'C'." + ) + order = order[0].upper() + # 4. Check that usm_type is None, or a valid value + dpctl.utils.validate_usm_type(usm_type, allow_none=True) + # 5. Normalize device/sycl_queue [keep it None if was None] + if device is not None or sycl_queue is not None: + sycl_queue = normalize_queue_device( + sycl_queue=sycl_queue, device=device + ) + + # handle instance(obj, usm_ndarray) + if isinstance(obj, dpt.usm_ndarray): + return _asarray_from_usm_ndarray( + obj, + dtype=dtype, + copy=copy, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + if hasattr(obj, "__usm_ndarray__"): + usm_arr = obj.__usm_ndarray__ + if isinstance(usm_arr, dpt.usm_ndarray): + return _asarray_from_usm_ndarray( + usm_arr, + dtype=dtype, + copy=copy, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + if hasattr(obj, "__sycl_usm_array_interface__"): + ary = _usm_ndarray_from_suai(obj) + return _asarray_from_usm_ndarray( + ary, + dtype=dtype, + copy=copy, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + if isinstance(obj, np.ndarray): + if copy is False: + raise ValueError( + "Converting numpy.ndarray to usm_ndarray requires a copy" + ) + return _asarray_from_numpy_ndarray( + obj, + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + if _is_object_with_buffer_protocol(obj): + if copy is False: + raise ValueError( + f"Converting {type(obj)} to usm_ndarray requires a copy" + ) + return _asarray_from_numpy_ndarray( + np.array(obj), + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + if isinstance(obj, (list, tuple, range)): + if copy is False: + raise ValueError( + "Converting Python sequence to usm_ndarray requires a copy" + ) + seq_shape, seq_dt, devs = _array_info_sequence(obj) + if devs == _host_set: + return _asarray_from_numpy_ndarray( + np.asarray(obj, dtype=dtype, order=order), + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + elif len(devs) == 1: + seq_dev = list(devs)[0] + return _asarray_from_seq_single_device( + obj, + seq_shape, + seq_dt, + seq_dev, + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) + elif len(devs) > 1: + devs = [dev for dev in devs if dev is not None] + if sycl_queue is None: + if len(devs) == 1: + alloc_q = devs[0] + else: + raise dpctl.utils.ExecutionPlacementError( + "Please specify `device` or `sycl_queue` keyword " + "argument to determine where to allocate the " + "resulting array." + ) + else: + alloc_q = sycl_queue + return _asarray_from_seq( + obj, + seq_shape, + seq_dt, + alloc_q, + # force copying via host + None, + dtype=dtype, + usm_type=usm_type, + order=order, + ) + if copy is False: + raise ValueError( + f"Converting {type(obj)} to usm_ndarray requires a copy" + ) + # obj is a scalar, create 0d array + return _asarray_from_numpy_ndarray( + np.asarray(obj, dtype=dtype), + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order="C", + ) + + +def empty( + shape, + *, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None, +): + """ + Creates :class:`dpctl.tensor.usm_ndarray` from uninitialized + USM allocation. + + Args: + shape (Tuple[int], int): + Dimensions of the array to be created. + dtype (optional): + data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, + or a NumPy scalar type. The ``None`` value creates an + array of floating point data type. Default: ``None`` + order (``"C"``, or ``F"``): + memory layout for the array. Default: ``"C"`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + Created empty array. + """ + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." + ) + order = order[0].upper() + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = _get_dtype(dtype, sycl_queue) + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = dpt.usm_ndarray( + shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + return res + + +def empty_like( + x, /, *, dtype=None, order="K", device=None, usm_type=None, sycl_queue=None +): + """ + Returns an uninitialized :class:`dpctl.tensor.usm_ndarray` with the + same `shape` as the input array `x`. + + Args: + x (usm_ndarray): + Input array from which to derive the output array shape. + dtype (optional): + data type of the array. Can be a typestring, + a :class:`numpy.dtype` object, NumPy char string, + or a NumPy scalar type. Default: ``None`` + order ("C", "F", "A", or "K"): + memory layout for the array. Default: ``"K"`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation. Default: ``None`` + + Returns: + usm_ndarray: + Created empty array with uninitialized memory. + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected instance of dpt.usm_ndarray, got {type(x)}.") + if ( + not isinstance(order, str) + or len(order) == 0 + or order[0] not in "CcFfAaKk" + ): + raise ValueError( + "Unrecognized order keyword value, expecting 'C', 'F', 'A', or 'K'." + ) + order = order[0].upper() + if dtype is None: + dtype = x.dtype + if usm_type is None: + usm_type = x.usm_type + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if device is None and sycl_queue is None: + device = x.device + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = dpt.dtype(dtype) + order = _normalize_order(order, x) + if order == "K": + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + return _empty_like_orderK(x, dtype, usm_type, sycl_queue) + else: + shape = x.shape + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = dpt.usm_ndarray( + shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + return res + + +def eye( + n_rows, + n_cols=None, + /, + *, + k=0, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None, +): + """ + eye(n_rows, n_cols=None, /, *, k=0, dtype=None, \ + device=None, usm_type="device", sycl_queue=None) + + Creates :class:`dpctl.tensor.usm_ndarray` with ones on the `k`-th + diagonal. + + Args: + n_rows (int): + number of rows in the output array. + n_cols (int, optional): + number of columns in the output array. If ``None``, + ``n_cols = n_rows``. Default: ``None`` + k (int): + index of the diagonal, with ``0`` as the main diagonal. + A positive value of ``k`` is a superdiagonal, a negative value + is a subdiagonal. + Raises :exc:`TypeError` if ``k`` is not an integer. + Default: ``0`` + dtype (optional): + data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, or + a NumPy scalar type. Default: ``None`` + order ("C" or "F"): + memory layout for the array. Default: ``"C"`` + device (optional): + array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + A diagonal matrix. + """ + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." + ) + order = order[0].upper() + n_rows = operator.index(n_rows) + n_cols = n_rows if n_cols is None else operator.index(n_cols) + k = operator.index(k) + if k >= n_cols or -k >= n_rows: + return dpt_ext.zeros( + (n_rows, n_cols), + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = _get_dtype(dtype, sycl_queue) + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = dpt.usm_ndarray( + (n_rows, n_cols), + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + if n_rows != 0 and n_cols != 0: + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + hev, eye_ev = ti._eye(k, dst=res, sycl_queue=sycl_queue) + _manager.add_event_pair(hev, eye_ev) + return res + + +def _validate_fill_value(fill_val): + """Validates that `fill_val` is a numeric or boolean scalar.""" + # TODO: verify if `np.True_` and `np.False_` should be instances of + # Number in NumPy, like other NumPy scalars and like Python bools + # check for `np.bool_` separately as NumPy<2 has no `np.bool` + if not isinstance(fill_val, Number) and not isinstance(fill_val, np.bool_): + raise TypeError( + f"array cannot be filled with scalar of type {type(fill_val)}" + ) + + +def full( + shape, + fill_value, + *, + dtype=None, + order="C", + device=None, + usm_type=None, + sycl_queue=None, +): + """ + Returns a new :class:`dpctl.tensor.usm_ndarray` having a specified + shape and filled with `fill_value`. + + Args: + shape (tuple): + Dimensions of the array to be created. + fill_value (int,float,complex,usm_ndarray): + fill value + dtype (optional): data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, + or a NumPy scalar type. Default: ``None`` + order ("C", or "F"): + memory layout for the array. Default: ``"C"`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + New array initialized with given value. + """ + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." + ) + order = order[0].upper() + dpctl.utils.validate_usm_type(usm_type, allow_none=True) + + if isinstance(fill_value, (dpt.usm_ndarray, np.ndarray, tuple, list)): + if ( + isinstance(fill_value, dpt.usm_ndarray) + and sycl_queue is None + and device is None + ): + sycl_queue = fill_value.sycl_queue + else: + sycl_queue = normalize_queue_device( + sycl_queue=sycl_queue, device=device + ) + X = dpt_ext.asarray( + fill_value, + dtype=dtype, + order=order, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + return dpt_ext.copy(dpt_ext.broadcast_to(X, shape), order=order) + else: + _validate_fill_value(fill_value) + + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + usm_type = usm_type if usm_type is not None else "device" + dtype = _get_dtype(dtype, sycl_queue, ref_type=type(fill_value)) + res = dpt.usm_ndarray( + shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + fill_value = _cast_fill_val(fill_value, dtype) + + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, full_ev = ti._full_usm_ndarray(fill_value, res, sycl_queue) + _manager.add_event_pair(hev, full_ev) + return res + + +def full_like( + x, + /, + fill_value, + *, + dtype=None, + order="K", + device=None, + usm_type=None, + sycl_queue=None, +): + """full_like(x, fill_value, dtype=None, order="K", \ + device=None, usm_type=None, sycl_queue=None) + + Returns a new :class:`dpctl.tensor.usm_ndarray` filled with `fill_value` + and having the same `shape` as the input array `x`. + + Args: + x (usm_ndarray): Input array from which to derive the output array + shape. + fill_value: the value to fill output array with + dtype (optional): + data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, or a + NumPy scalar type. If ``dtype`` is ``None``, the output array data + type is inferred from ``x``. Default: ``None`` + order ("C", "F", "A", or "K"): + memory layout for the array. Default: ``"K"`` + device (optional): + array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + New array initialized with given value. + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected instance of dpt.usm_ndarray, got {type(x)}.") + if ( + not isinstance(order, str) + or len(order) == 0 + or order[0] not in "CcFfAaKk" + ): + raise ValueError( + "Unrecognized order keyword value, expecting 'C', 'F', 'A', or 'K'." + ) + order = order[0].upper() + if dtype is None: + dtype = x.dtype + if usm_type is None: + usm_type = x.usm_type + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if device is None and sycl_queue is None: + device = x.device + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + sh = x.shape + dtype = dpt.dtype(dtype) + order = _normalize_order(order, x) + if order == "K": + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + if isinstance(fill_value, (dpt.usm_ndarray, np.ndarray, tuple, list)): + X = dpt_ext.asarray( + fill_value, + dtype=dtype, + order=order, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + X = dpt_ext.broadcast_to(X, sh) + res = _empty_like_orderK(x, dtype, usm_type, sycl_queue) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # order copy after tasks populating X + dep_evs = _manager.submitted_events + hev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=X, dst=res, sycl_queue=sycl_queue, depends=dep_evs + ) + _manager.add_event_pair(hev, copy_ev) + return res + else: + _validate_fill_value(fill_value) + + dtype = _get_dtype(dtype, sycl_queue, ref_type=type(fill_value)) + res = _empty_like_orderK(x, dtype, usm_type, sycl_queue) + fill_value = _cast_fill_val(fill_value, dtype) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, full_ev = ti._full_usm_ndarray(fill_value, res, sycl_queue) + _manager.add_event_pair(hev, full_ev) + return res + else: + return full( + sh, + fill_value, + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + + +def linspace( + start, + stop, + /, + num, + *, + dtype=None, + device=None, + endpoint=True, + sycl_queue=None, + usm_type="device", +): + """ + linspace(start, stop, num, dtype=None, device=None, endpoint=True, \ + sycl_queue=None, usm_type="device") + + Returns :class:`dpctl.tensor.usm_ndarray` array populated with + evenly spaced numbers of specified interval. + + Args: + start: + the start of the interval. + stop: + the end of the interval. If the ``endpoint`` is ``False``, the + function generates ``num+1`` evenly spaced points starting + with ``start`` and ending with ``stop`` and exclude the + ``stop`` from the returned array such that the returned array + consists of evenly spaced numbers over the half-open interval + ``[start, stop)``. If ``endpoint`` is ``True``, the output + array consists of evenly spaced numbers over the closed + interval ``[start, stop]``. Default: ``True`` + num (int): + number of samples. Must be a non-negative integer; otherwise, + the function raises ``ValueError`` exception. + dtype: + output array data type. Should be a floating data type. + If ``dtype`` is ``None``, the output array must be the default + floating point data type for target device. + Default: ``None`` + device (optional): + array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + endpoint: boolean indicating whether to include ``stop`` in the + interval. Default: ``True`` + + Returns: + usm_ndarray: + Array populated with evenly spaced numbers in the requested + interval. + """ + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if endpoint not in [True, False]: + raise TypeError("endpoint keyword argument must be of boolean type") + + num = operator.index(num) + if num < 0: + raise ValueError("Number of points must be non-negative") + + _, dt = _coerce_and_infer_dt( + start, + stop, + dt=dtype, + sycl_queue=sycl_queue, + err_msg="start and stop must be Python scalars.", + allow_bool=True, + ) + + int_dt = None + if np.issubdtype(dt, np.integer): + if dtype is not None: + int_dt = dt + dt = ti.default_device_fp_type(sycl_queue) + dt = dpt.dtype(dt) + start = float(start) + stop = float(stop) + + res = dpt_ext.empty(num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + hev, la_ev = ti._linspace_affine( + start, stop, dst=res, include_endpoint=endpoint, sycl_queue=sycl_queue + ) + _manager.add_event_pair(hev, la_ev) + + return res if int_dt is None else dpt.astype(res, int_dt) + + +def meshgrid(*arrays, indexing="xy"): + """ + Creates list of :class:`dpctl.tensor.usm_ndarray` coordinate matrices + from vectors. + + Args: + arrays (usm_ndarray): + an arbitrary number of one-dimensional arrays + representing grid coordinates. Each array should have the same + numeric data type. + indexing (``"xy"``, or ``"ij"``): + Cartesian (``"xy"``) or matrix (``"ij"``) indexing of output. + If provided zero or one one-dimensional vector(s) (i.e., the + zero- and one-dimensional cases, respectively), the ``indexing`` + keyword has no effect and should be ignored. Default: ``"xy"`` + + Returns: + List[array]: + list of ``N`` arrays, where ``N`` is the number of + provided one-dimensional input arrays. Each returned array must + have rank ``N``. + For a set of ``n`` vectors with lengths ``N0``, ``N1``, ``N2``, ... + The cartesian indexing results in arrays of shape + ``(N1, N0, N2, ...)``, while the + matrix indexing results in arrays of shape + ``(N0, N1, N2, ...)``. + Default: ``"xy"``. + + Raises: + ValueError: If vectors are not of the same data type, or are not + one-dimensional. + + """ + ref_dt = None + ref_unset = True + for array in arrays: + if not isinstance(array, dpt.usm_ndarray): + raise TypeError( + f"Expected instance of dpt.usm_ndarray, got {type(array)}." + ) + if array.ndim != 1: + raise ValueError("All arrays must be one-dimensional.") + if ref_unset: + ref_unset = False + ref_dt = array.dtype + else: + if not ref_dt == array.dtype: + raise ValueError( + "All arrays must be of the same numeric data type." + ) + if indexing not in ["xy", "ij"]: + raise ValueError( + "Unrecognized indexing keyword value, expecting 'xy' or 'ij.'" + ) + n = len(arrays) + if n == 0: + return [] + + sh = (-1,) + (1,) * (n - 1) + + res = [] + if n > 1 and indexing == "xy": + res.append(dpt_ext.reshape(arrays[0], (1, -1) + sh[2:], copy=True)) + res.append(dpt_ext.reshape(arrays[1], sh, copy=True)) + arrays, sh = arrays[2:], sh[-2:] + sh[:-2] + + for array in arrays: + res.append(dpt_ext.reshape(array, sh, copy=True)) + sh = sh[-1:] + sh[:-1] + + output = dpt_ext.broadcast_arrays(*res) + return output -def eye( - n_rows, - n_cols=None, - /, + +def ones( + shape, *, - k=0, dtype=None, order="C", device=None, usm_type="device", sycl_queue=None, ): - """ - eye(n_rows, n_cols=None, /, *, k=0, dtype=None, \ - device=None, usm_type="device", sycl_queue=None) + """ones(shape, dtype=None, order="C", \ + device=None, usm_type="device", sycl_queue=None) - Creates :class:`dpctl.tensor.usm_ndarray` with ones on the `k`-th - diagonal. + Returns a new :class:`dpctl.tensor.usm_ndarray` having a specified + shape and filled with ones. Args: - n_rows (int): - number of rows in the output array. - n_cols (int, optional): - number of columns in the output array. If ``None``, - ``n_cols = n_rows``. Default: ``None`` - k (int): - index of the diagonal, with ``0`` as the main diagonal. - A positive value of ``k`` is a superdiagonal, a negative value - is a subdiagonal. - Raises :exc:`TypeError` if ``k`` is not an integer. - Default: ``0`` + shape (Tuple[int], int): + Dimensions of the array to be created. dtype (optional): data type of the array. Can be typestring, - a :class:`numpy.dtype` object, :mod:`numpy` char string, or - a NumPy scalar type. Default: ``None`` - order ("C" or "F"): - memory layout for the array. Default: ``"C"`` - device (optional): - array API concept of device where the output array + a :class:`numpy.dtype` object, :mod:`numpy` char string, + or a NumPy scalar type. Default: ``None`` + order ("C", or "F"): memory layout for the array. Default: ``"C"`` + device (optional): array API concept of device where the output array is created. ``device`` can be ``None``, a oneAPI filter selector string, an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, an instance of @@ -158,79 +1562,48 @@ def eye( Returns: usm_ndarray: - A diagonal matrix. + Created array initialized with ones. """ if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": raise ValueError( "Unrecognized order keyword value, expecting 'F' or 'C'." ) order = order[0].upper() - n_rows = operator.index(n_rows) - n_cols = n_rows if n_cols is None else operator.index(n_cols) - k = operator.index(k) - if k >= n_cols or -k >= n_rows: - return dpt.zeros( - (n_rows, n_cols), - dtype=dtype, - order=order, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue, - ) dpctl.utils.validate_usm_type(usm_type, allow_none=False) sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) dtype = _get_dtype(dtype, sycl_queue) - _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) res = dpt.usm_ndarray( - (n_rows, n_cols), + shape, dtype=dtype, buffer=usm_type, order=order, buffer_ctor_kwargs={"queue": sycl_queue}, ) - if n_rows != 0 and n_cols != 0: - _manager = dpctl.utils.SequentialOrderManager[sycl_queue] - hev, eye_ev = ti._eye(k, dst=res, sycl_queue=sycl_queue) - _manager.add_event_pair(hev, eye_ev) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, full_ev = ti._full_usm_ndarray(1, res, sycl_queue) + _manager.add_event_pair(hev, full_ev) return res -def _validate_fill_value(fill_val): - """Validates that `fill_val` is a numeric or boolean scalar.""" - # TODO: verify if `np.True_` and `np.False_` should be instances of - # Number in NumPy, like other NumPy scalars and like Python bools - # check for `np.bool_` separately as NumPy<2 has no `np.bool` - if not isinstance(fill_val, Number) and not isinstance(fill_val, np.bool_): - raise TypeError( - f"array cannot be filled with scalar of type {type(fill_val)}" - ) - - -def full( - shape, - fill_value, - *, - dtype=None, - order="C", - device=None, - usm_type=None, - sycl_queue=None, +def ones_like( + x, /, *, dtype=None, order="K", device=None, usm_type=None, sycl_queue=None ): """ - Returns a new :class:`dpctl.tensor.usm_ndarray` having a specified - shape and filled with `fill_value`. + Returns a new :class:`dpctl.tensor.usm_ndarray` filled with ones and + having the same `shape` as the input array `x`. Args: - shape (tuple): - Dimensions of the array to be created. - fill_value (int,float,complex,usm_ndarray): - fill value - dtype (optional): data type of the array. Can be typestring, + x (usm_ndarray): + Input array from which to derive the output array shape + dtype (optional): + data type of the array. Can be typestring, a :class:`numpy.dtype` object, :mod:`numpy` char string, - or a NumPy scalar type. Default: ``None`` - order ("C", or "F"): + or a NumPy scalar type. Default: `None` + order ("C", "F", "A", or "K"): memory layout for the array. Default: ``"C"`` - device (optional): array API concept of device where the output array + device (optional): + array API concept of device where the output array is created. ``device`` can be ``None``, a oneAPI filter selector string, an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, an instance of @@ -251,54 +1624,47 @@ def full( Returns: usm_ndarray: - New array initialized with given value. + New array initialized with ones. """ - if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected instance of dpt.usm_ndarray, got {type(x)}.") + if ( + not isinstance(order, str) + or len(order) == 0 + or order[0] not in "CcFfAaKk" + ): raise ValueError( - "Unrecognized order keyword value, expecting 'F' or 'C'." + "Unrecognized order keyword value, expecting 'C', 'F', 'A', or 'K'." ) order = order[0].upper() - dpctl.utils.validate_usm_type(usm_type, allow_none=True) - - if isinstance(fill_value, (dpt.usm_ndarray, np.ndarray, tuple, list)): - if ( - isinstance(fill_value, dpt.usm_ndarray) - and sycl_queue is None - and device is None - ): - sycl_queue = fill_value.sycl_queue - else: - sycl_queue = normalize_queue_device( - sycl_queue=sycl_queue, device=device - ) - X = dpt.asarray( - fill_value, + if dtype is None: + dtype = x.dtype + if usm_type is None: + usm_type = x.usm_type + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if device is None and sycl_queue is None: + device = x.device + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = dpt.dtype(dtype) + order = _normalize_order(order, x) + if order == "K": + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = _empty_like_orderK(x, dtype, usm_type, sycl_queue) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, full_ev = ti._full_usm_ndarray(1, res, sycl_queue) + _manager.add_event_pair(hev, full_ev) + return res + else: + sh = x.shape + return ones( + sh, dtype=dtype, order=order, + device=device, usm_type=usm_type, sycl_queue=sycl_queue, ) - return dpt_ext.copy(dpt.broadcast_to(X, shape), order=order) - else: - _validate_fill_value(fill_value) - - sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) - usm_type = usm_type if usm_type is not None else "device" - dtype = _get_dtype(dtype, sycl_queue, ref_type=type(fill_value)) - res = dpt.usm_ndarray( - shape, - dtype=dtype, - buffer=usm_type, - order=order, - buffer_ctor_kwargs={"queue": sycl_queue}, - ) - fill_value = _cast_fill_val(fill_value, dtype) - - _manager = dpctl.utils.SequentialOrderManager[sycl_queue] - # populating new allocation, no dependent events - hev, full_ev = ti._full_usm_ndarray(fill_value, res, sycl_queue) - _manager.add_event_pair(hev, full_ev) - return res def tril(x, /, *, k=0): @@ -340,7 +1706,7 @@ def tril(x, /, *, k=0): q = x.sycl_queue if k >= shape[nd - 1] - 1: - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, order=order, @@ -354,7 +1720,7 @@ def tril(x, /, *, k=0): ) _manager.add_event_pair(hev, cpy_ev) elif k < -shape[nd - 2]: - res = dpt.zeros( + res = dpt_ext.zeros( x.shape, dtype=x.dtype, order=order, @@ -362,7 +1728,7 @@ def tril(x, /, *, k=0): sycl_queue=q, ) else: - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, order=order, @@ -418,7 +1784,7 @@ def triu(x, /, *, k=0): q = x.sycl_queue if k > shape[nd - 1]: - res = dpt.zeros( + res = dpt_ext.zeros( x.shape, dtype=x.dtype, order=order, @@ -426,7 +1792,7 @@ def triu(x, /, *, k=0): sycl_queue=q, ) elif k <= -shape[nd - 2] + 1: - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, order=order, @@ -440,7 +1806,7 @@ def triu(x, /, *, k=0): ) _manager.add_event_pair(hev, cpy_ev) else: - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, order=order, @@ -455,3 +1821,156 @@ def triu(x, /, *, k=0): _manager.add_event_pair(hev, triu_ev) return res + + +def zeros( + shape, + *, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None, +): + """ + Returns a new :class:`dpctl.tensor.usm_ndarray` having a specified + shape and filled with zeros. + + Args: + shape (Tuple[int], int): + Dimensions of the array to be created. + dtype (optional): + data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, + or a NumPy scalar type. Default: ``None`` + order ("C", or "F"): + memory layout for the array. Default: ``"C"`` + device (optional): array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + Constructed array initialized with zeros. + """ + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." + ) + order = order[0].upper() + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = _get_dtype(dtype, sycl_queue) + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = dpt.usm_ndarray( + shape, + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, zeros_ev = ti._zeros_usm_ndarray(res, sycl_queue) + _manager.add_event_pair(hev, zeros_ev) + + return res + + +def zeros_like( + x, /, *, dtype=None, order="K", device=None, usm_type=None, sycl_queue=None +): + """ + Creates :class:`dpctl.tensor.usm_ndarray` from USM allocation + initialized with zeros. + + Args: + x (usm_ndarray): + Input array from which to derive the shape of the + output array. + dtype (optional): + data type of the array. Can be typestring, + a :class:`numpy.dtype` object, :mod:`numpy` char string, or a + NumPy scalar type. If `None`, output array has the same data + type as the input array. Default: ``None`` + order ("C", or "F"): + memory layout for the array. Default: ``"C"`` + device (optional): + array API concept of device where the output array + is created. ``device`` can be ``None``, a oneAPI filter selector + string, an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` object + returned by :attr:`dpctl.tensor.usm_ndarray.device`. + Default: ``None`` + usm_type (``"device"``, ``"shared"``, ``"host"``, optional): + The type of SYCL USM allocation for the output array. + Default: ``"device"`` + sycl_queue (:class:`dpctl.SyclQueue`, optional): + The SYCL queue to use + for output array allocation and copying. ``sycl_queue`` and + ``device`` are complementary arguments, i.e. use one or another. + If both are specified, a :exc:`TypeError` is raised unless both + imply the same underlying SYCL queue to be used. If both are + ``None``, a cached queue targeting default-selected device is + used for allocation and population. Default: ``None`` + + Returns: + usm_ndarray: + New array initialized with zeros. + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected instance of dpt.usm_ndarray, got {type(x)}.") + if ( + not isinstance(order, str) + or len(order) == 0 + or order[0] not in "CcFfAaKk" + ): + raise ValueError( + "Unrecognized order keyword value, expecting 'C', 'F', 'A', or 'K'." + ) + order = order[0].upper() + if dtype is None: + dtype = x.dtype + if usm_type is None: + usm_type = x.usm_type + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if device is None and sycl_queue is None: + device = x.device + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = dpt.dtype(dtype) + order = _normalize_order(order, x) + if order == "K": + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + res = _empty_like_orderK(x, dtype, usm_type, sycl_queue) + _manager = dpctl.utils.SequentialOrderManager[sycl_queue] + # populating new allocation, no dependent events + hev, full_ev = ti._full_usm_ndarray(0, res, sycl_queue) + _manager.add_event_pair(hev, full_ev) + return res + else: + _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) + sh = x.shape + return zeros( + sh, + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) diff --git a/dpctl_ext/tensor/_indexing_functions.py b/dpctl_ext/tensor/_indexing_functions.py index 6ca327192f7..91ffc759a92 100644 --- a/dpctl_ext/tensor/_indexing_functions.py +++ b/dpctl_ext/tensor/_indexing_functions.py @@ -57,7 +57,7 @@ def _get_indexing_mode(name): def _range(sh_i, i, nd, q, usm_t, dt): - ind = dpt.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q) + ind = dpt_ext.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q) ind.shape = tuple(sh_i if i == j else 1 for j in range(nd)) return ind @@ -177,7 +177,7 @@ def place(arr, mask, vals): raise dpctl.utils.ExecutionPlacementError if arr.shape != mask.shape or vals.ndim != 1: raise ValueError("Array sizes are not as required") - cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q) + cumsum = dpt_ext.empty(mask.size, dtype="i8", sycl_queue=exec_q) _manager = dpctl.utils.SequentialOrderManager[exec_q] deps_ev = _manager.submitted_events nz_count = ti.mask_positions( @@ -329,7 +329,7 @@ def put_vec_duplicates(vec, ind, vals): val_shape = indices.shape if not isinstance(vals, dpt.usm_ndarray): - vals = dpt.asarray( + vals = dpt_ext.asarray( vals, dtype=x.dtype, usm_type=vals_usm_type, sycl_queue=exec_q ) # choose to throw here for consistency with `place` @@ -341,7 +341,7 @@ def put_vec_duplicates(vec, ind, vals): rhs = vals else: rhs = dpt_ext.astype(vals, x.dtype) - rhs = dpt.broadcast_to(rhs, val_shape) + rhs = dpt_ext.broadcast_to(rhs, val_shape) _manager = dpctl.utils.SequentialOrderManager[exec_q] deps_ev = _manager.submitted_events @@ -540,9 +540,9 @@ def take(x, indices, /, *, axis=None, out=None, mode="wrap"): "Input and output allocation queues are not compatible" ) if ti._array_overlap(x, out): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=dt, usm_type=res_usm_type, sycl_queue=exec_q ) diff --git a/dpctl_ext/tensor/_manipulation_functions.py b/dpctl_ext/tensor/_manipulation_functions.py index f1b8b46dbcb..08459dcaea7 100644 --- a/dpctl_ext/tensor/_manipulation_functions.py +++ b/dpctl_ext/tensor/_manipulation_functions.py @@ -40,6 +40,7 @@ import dpctl_ext.tensor._tensor_impl as ti from ._numpy_helper import normalize_axis_index, normalize_axis_tuple +from ._type_utils import _supported_dtype, _to_device_supported_dtype __doc__ = ( "Implementation module for array manipulation " @@ -47,6 +48,55 @@ ) +def _arrays_validation(arrays, check_ndim=True): + n = len(arrays) + if n == 0: + raise TypeError("Missing 1 required positional argument: 'arrays'.") + + if not isinstance(arrays, (list, tuple)): + raise TypeError(f"Expected tuple or list type, got {type(arrays)}.") + + for X in arrays: + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + exec_q = dputils.get_execution_queue([X.sycl_queue for X in arrays]) + if exec_q is None: + raise ValueError("All the input arrays must have same sycl queue.") + + res_usm_type = dputils.get_coerced_usm_type([X.usm_type for X in arrays]) + if res_usm_type is None: + raise ValueError("All the input arrays must have usm_type.") + + X0 = arrays[0] + _supported_dtype(Xi.dtype for Xi in arrays) + + res_dtype = X0.dtype + dev = exec_q.sycl_device + for i in range(1, n): + res_dtype = np.promote_types(res_dtype, arrays[i]) + res_dtype = _to_device_supported_dtype(res_dtype, dev) + + if check_ndim: + for i in range(1, n): + if X0.ndim != arrays[i].ndim: + raise ValueError( + "All the input arrays must have same number of dimensions, " + f"but the array at index 0 has {X0.ndim} dimension(s) and " + f"the array at index {i} has {arrays[i].ndim} dimension(s)." + ) + return res_dtype, res_usm_type, exec_q + + +def _broadcast_shapes(*args): + """ + Broadcast the input shapes into a single shape; + returns tuple broadcasted shape. + """ + array_shapes = [array.shape for array in args] + return _broadcast_shape_impl(array_shapes) + + def _broadcast_shape_impl(shapes): if len(set(shapes)) == 1: return shapes[0] @@ -86,6 +136,395 @@ def _broadcast_shape_impl(shapes): return tuple(common_shape) +def _broadcast_strides(X_shape, X_strides, res_ndim): + """ + Broadcasts strides to match the given dimensions; + returns tuple type strides. + """ + out_strides = [0] * res_ndim + X_shape_len = len(X_shape) + str_dim = -X_shape_len + for i in range(X_shape_len): + shape_value = X_shape[i] + if not shape_value == 1: + out_strides[str_dim] = X_strides[i] + str_dim += 1 + + return tuple(out_strides) + + +def _check_same_shapes(X0_shape, axis, n, arrays): + for i in range(1, n): + Xi_shape = arrays[i].shape + for j, X0j in enumerate(X0_shape): + if X0j != Xi_shape[j] and j != axis: + raise ValueError( + "All the input array dimensions for the concatenation " + f"axis must match exactly, but along dimension {j}, the " + f"array at index 0 has size {X0j} and the array " + f"at index {i} has size {Xi_shape[j]}." + ) + + +def _concat_axis_None(arrays): + """Implementation of concat(arrays, axis=None).""" + res_dtype, res_usm_type, exec_q = _arrays_validation( + arrays, check_ndim=False + ) + res_shape = 0 + for array in arrays: + res_shape += array.size + res = dpt_ext.empty( + res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + fill_start = 0 + _manager = dputils.SequentialOrderManager[exec_q] + deps = _manager.submitted_events + for array in arrays: + fill_end = fill_start + array.size + if array.flags.c_contiguous: + hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=dpt_ext.reshape(array, -1), + dst=res[fill_start:fill_end], + sycl_queue=exec_q, + depends=deps, + ) + _manager.add_event_pair(hev, cpy_ev) + else: + src_ = array + # _copy_usm_ndarray_for_reshape requires src and dst to have + # the same data type + if not array.dtype == res_dtype: + src2_ = dpt_ext.empty_like(src_, dtype=res_dtype) + ht_copy_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=src_, dst=src2_, sycl_queue=exec_q, depends=deps + ) + _manager.add_event_pair(ht_copy_ev, cpy_ev) + hev, reshape_copy_ev = ti._copy_usm_ndarray_for_reshape( + src=src2_, + dst=res[fill_start:fill_end], + sycl_queue=exec_q, + depends=[cpy_ev], + ) + _manager.add_event_pair(hev, reshape_copy_ev) + else: + hev, cpy_ev = ti._copy_usm_ndarray_for_reshape( + src=src_, + dst=res[fill_start:fill_end], + sycl_queue=exec_q, + depends=deps, + ) + _manager.add_event_pair(hev, cpy_ev) + fill_start = fill_end + + return res + + +def broadcast_arrays(*args): + """broadcast_arrays(*arrays) + + Broadcasts one or more :class:`dpctl.tensor.usm_ndarrays` against + one another. + + Args: + arrays (usm_ndarray): an arbitrary number of arrays to be + broadcasted. + + Returns: + List[usm_ndarray]: + A list of broadcasted arrays. Each array + must have the same shape. Each array must have the same `dtype`, + `device` and `usm_type` attributes as its corresponding input + array. + """ + if len(args) == 0: + raise ValueError("`broadcast_arrays` requires at least one argument") + for X in args: + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + shape = _broadcast_shapes(*args) + + if all(X.shape == shape for X in args): + return args + + return [broadcast_to(X, shape) for X in args] + + +def broadcast_to(X, /, shape): + """broadcast_to(x, shape) + + Broadcast an array to a new `shape`; returns the broadcasted + :class:`dpctl.tensor.usm_ndarray` as a view. + + Args: + x (usm_ndarray): input array + shape (Tuple[int,...]): array shape. The `shape` must be + compatible with `x` according to broadcasting rules. + + Returns: + usm_ndarray: + An array with the specified `shape`. + The output array is a view of the input array, and + hence has the same data type, USM allocation type and + device attributes. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + # Use numpy.broadcast_to to check the validity of the input + # parameter 'shape'. Raise ValueError if 'X' is not compatible + # with 'shape' according to NumPy's broadcasting rules. + new_array = np.broadcast_to( + np.broadcast_to(np.empty(tuple(), dtype="u1"), X.shape), shape + ) + new_sts = _broadcast_strides(X.shape, X.strides, new_array.ndim) + return dpt.usm_ndarray( + shape=new_array.shape, + dtype=X.dtype, + buffer=X, + strides=new_sts, + offset=X._element_offset, + ) + + +def concat(arrays, /, *, axis=0): + """concat(arrays, axis) + + Joins a sequence of arrays along an existing axis. + + Args: + arrays (Union[List[usm_ndarray, Tuple[usm_ndarray,...]]]): + input arrays to join. The arrays must have the same shape, + except in the dimension specified by `axis`. + axis (Optional[int]): axis along which the arrays will be joined. + If `axis` is `None`, arrays must be flattened before + concatenation. If `axis` is negative, it is understood as + being counted from the last dimension. Default: `0`. + + Returns: + usm_ndarray: + An output array containing the concatenated + values. The output array data type is determined by Type + Promotion Rules of array API. + + All input arrays must have the same device attribute. The output array + is allocated on that same device, and data movement operations are + scheduled on a queue underlying the device. The USM allocation type + of the output array is determined by USM allocation type promotion + rules. + """ + if axis is None: + return _concat_axis_None(arrays) + + res_dtype, res_usm_type, exec_q = _arrays_validation(arrays) + n = len(arrays) + X0 = arrays[0] + + axis = normalize_axis_index(axis, X0.ndim) + X0_shape = X0.shape + _check_same_shapes(X0_shape, axis, n, arrays) + + res_shape_axis = 0 + for X in arrays: + res_shape_axis = res_shape_axis + X.shape[axis] + + res_shape = tuple( + X0_shape[i] if i != axis else res_shape_axis for i in range(X0.ndim) + ) + + res = dpt_ext.empty( + res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + _manager = dputils.SequentialOrderManager[exec_q] + deps = _manager.submitted_events + fill_start = 0 + for i in range(n): + fill_end = fill_start + arrays[i].shape[axis] + c_shapes_copy = tuple( + np.s_[fill_start:fill_end] if j == axis else np.s_[:] + for j in range(X0.ndim) + ) + hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=arrays[i], + dst=res[c_shapes_copy], + sycl_queue=exec_q, + depends=deps, + ) + _manager.add_event_pair(hev, cpy_ev) + fill_start = fill_end + + return res + + +def expand_dims(X, /, *, axis=0): + """expand_dims(x, axis) + + Expands the shape of an array by inserting a new axis (dimension) + of size one at the position specified by axis. + + Args: + x (usm_ndarray): + input array + axis (Union[int, Tuple[int]]): + axis position in the expanded axes (zero-based). If `x` has rank + (i.e, number of dimensions) `N`, a valid `axis` must reside + in the closed-interval `[-N-1, N]`. If provided a negative + `axis`, the `axis` position at which to insert a singleton + dimension is computed as `N + axis + 1`. Hence, if + provided `-1`, the resolved axis position is `N` (i.e., + a singleton dimension must be appended to the input array `x`). + If provided `-N-1`, the resolved axis position is `0` (i.e., a + singleton dimension is prepended to the input array `x`). + + Returns: + usm_ndarray: + Returns a view, if possible, and a copy otherwise with the number + of dimensions increased. + The expanded array has the same data type as the input array `x`. + The expanded array is located on the same device as the input + array, and has the same USM allocation type. + + Raises: + IndexError: if `axis` value is invalid. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + if type(axis) not in (tuple, list): + axis = (axis,) + + out_ndim = len(axis) + X.ndim + axis = normalize_axis_tuple(axis, out_ndim) + + shape_it = iter(X.shape) + shape = tuple(1 if ax in axis else next(shape_it) for ax in range(out_ndim)) + + return dpt_ext.reshape(X, shape) + + +def flip(X, /, *, axis=None): + """flip(x, axis) + + Reverses the order of elements in an array `x` along the given `axis`. + The shape of the array is preserved, but the elements are reordered. + + Args: + x (usm_ndarray): input array. + axis (Optional[Union[int, Tuple[int,...]]]): axis (or axes) along + which to flip. + If `axis` is `None`, all input array axes are flipped. + If `axis` is negative, the flipped axis is counted from the + last dimension. If provided more than one axis, only the specified + axes are flipped. Default: `None`. + + Returns: + usm_ndarray: + A view of `x` with the entries of `axis` reversed. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + X_ndim = X.ndim + if axis is None: + indexer = (np.s_[::-1],) * X_ndim + else: + axis = normalize_axis_tuple(axis, X_ndim) + indexer = tuple( + np.s_[::-1] if i in axis else np.s_[:] for i in range(X.ndim) + ) + return X[indexer] + + +def moveaxis(X, source, destination, /): + """moveaxis(x, source, destination) + + Moves axes of an array to new positions. + + Args: + x (usm_ndarray): input array + + source (int or a sequence of int): + Original positions of the axes to move. + These must be unique. If `x` has rank (i.e., number of + dimensions) `N`, a valid `axis` must be in the + half-open interval `[-N, N)`. + + destination (int or a sequence of int): + Destination positions for each of the original axes. + These must also be unique. If `x` has rank + (i.e., number of dimensions) `N`, a valid `axis` must be + in the half-open interval `[-N, N)`. + + Returns: + usm_ndarray: + Array with moved axes. + The returned array must has the same data type as `x`, + is created on the same device as `x` and has the same + USM allocation type as `x`. + + Raises: + AxisError: if `axis` value is invalid. + ValueError: if `src` and `dst` have not equal number of elements. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + source = normalize_axis_tuple(source, X.ndim, "source") + destination = normalize_axis_tuple(destination, X.ndim, "destination") + + if len(source) != len(destination): + raise ValueError( + "`source` and `destination` arguments must have " + "the same number of elements" + ) + + ind = [n for n in range(X.ndim) if n not in source] + + for src, dst in sorted(zip(destination, source)): + ind.insert(src, dst) + + return dpt_ext.permute_dims(X, tuple(ind)) + + +def permute_dims(X, /, axes): + """permute_dims(x, axes) + + Permute the axes (dimensions) of an array; returns the permuted + array as a view. + + Args: + x (usm_ndarray): input array. + axes (Tuple[int, ...]): tuple containing permutation of + `(0,1,...,N-1)` where `N` is the number of axes (dimensions) + of `x`. + Returns: + usm_ndarray: + An array with permuted axes. + The returned array must has the same data type as `x`, + is created on the same device as `x` and has the same USM allocation + type as `x`. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + axes = normalize_axis_tuple(axes, X.ndim, "axes") + if not X.ndim == len(axes): + raise ValueError( + "The length of the passed axes does not match " + "to the number of usm_ndarray dimensions." + ) + newstrides = tuple(X.strides[i] for i in axes) + newshape = tuple(X.shape[i] for i in axes) + return dpt.usm_ndarray( + shape=newshape, + dtype=X.dtype, + buffer=X, + strides=newstrides, + offset=X._element_offset, + ) + + def repeat(x, repeats, /, *, axis=None): """repeat(x, repeats, axis=None) @@ -204,7 +643,7 @@ def repeat(x, repeats, /, *, axis=None): "`repeats` sequence must have the same length as the " "repeated axis" ) - repeats = dpt.asarray( + repeats = dpt_ext.asarray( repeats, dtype=dpt.int64, usm_type=usm_type, sycl_queue=exec_q ) if not dpt.all(repeats >= 0): @@ -223,7 +662,7 @@ def repeat(x, repeats, /, *, axis=None): res_shape = x_shape[:axis] + (res_axis_size,) + x_shape[axis + 1 :] else: res_shape = (res_axis_size,) - res = dpt.empty( + res = dpt_ext.empty( res_shape, dtype=x.dtype, usm_type=usm_type, sycl_queue=exec_q ) if res_axis_size > 0: @@ -238,7 +677,7 @@ def repeat(x, repeats, /, *, axis=None): _manager.add_event_pair(ht_rep_ev, rep_ev) else: if repeats.dtype != dpt.int64: - rep_buf = dpt.empty( + rep_buf = dpt_ext.empty( repeats.shape, dtype=dpt.int64, usm_type=usm_type, @@ -248,7 +687,7 @@ def repeat(x, repeats, /, *, axis=None): src=repeats, dst=rep_buf, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(ht_copy_ev, copy_ev) - cumsum = dpt.empty( + cumsum = dpt_ext.empty( (axis_size,), dtype=dpt.int64, usm_type=usm_type, @@ -264,7 +703,7 @@ def repeat(x, repeats, /, *, axis=None): ) else: res_shape = (res_axis_size,) - res = dpt.empty( + res = dpt_ext.empty( res_shape, dtype=x.dtype, usm_type=usm_type, @@ -281,7 +720,7 @@ def repeat(x, repeats, /, *, axis=None): ) _manager.add_event_pair(ht_rep_ev, rep_ev) else: - cumsum = dpt.empty( + cumsum = dpt_ext.empty( (axis_size,), dtype=dpt.int64, usm_type=usm_type, @@ -296,7 +735,7 @@ def repeat(x, repeats, /, *, axis=None): ) else: res_shape = (res_axis_size,) - res = dpt.empty( + res = dpt_ext.empty( res_shape, dtype=x.dtype, usm_type=usm_type, @@ -353,7 +792,7 @@ def roll(x, /, shift, *, axis=None): _manager = dputils.SequentialOrderManager[exec_q] if axis is None: shift = operator.index(shift) - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q ) sz = operator.index(x.size) @@ -380,7 +819,7 @@ def roll(x, /, shift, *, axis=None): n_i = operator.index(shape[ax]) shifted = shifts[ax] + operator.index(sh) shifts[ax] = (shifted % n_i) if n_i > 0 else 0 - res = dpt.empty( + res = dpt_ext.empty( x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q ) dep_evs = _manager.submitted_events @@ -389,3 +828,273 @@ def roll(x, /, shift, *, axis=None): ) _manager.add_event_pair(ht_e, roll_ev) return res + + +def squeeze(X, /, axis=None): + """squeeze(x, axis) + + Removes singleton dimensions (axes) from array `x`. + + Args: + x (usm_ndarray): input array + axis (Union[int, Tuple[int,...]]): axis (or axes) to squeeze. + + Returns: + usm_ndarray: + Output array is a view, if possible, + and a copy otherwise, but with all or a subset of the + dimensions of length 1 removed. Output has the same data + type as the input, is allocated on the same device as the + input and has the same USM allocation type as the input + array `x`. + + Raises: + ValueError: if the specified axis has a size greater than one. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + X_shape = X.shape + if axis is not None: + axis = normalize_axis_tuple(axis, X.ndim if X.ndim != 0 else X.ndim + 1) + new_shape = [] + for i, x in enumerate(X_shape): + if i not in axis: + new_shape.append(x) + else: + if x != 1: + raise ValueError( + "Cannot select an axis to squeeze out " + "which has size not equal to one." + ) + new_shape = tuple(new_shape) + else: + new_shape = tuple(axis for axis in X_shape if axis != 1) + if new_shape == X.shape: + return X + else: + return dpt_ext.reshape(X, new_shape) + + +def stack(arrays, /, *, axis=0): + """ + stack(arrays, axis) + + Joins a sequence of arrays along a new axis. + + Args: + arrays (Union[List[usm_ndarray], Tuple[usm_ndarray,...]]): + input arrays to join. Each array must have the same shape. + axis (int): axis along which the arrays will be joined. Providing + an `axis` specified the index of the new axis in the dimensions + of the output array. A valid axis must be on the interval + `[-N, N)`, where `N` is the rank (number of dimensions) of `x`. + Default: `0`. + + Returns: + usm_ndarray: + An output array having rank `N+1`, where `N` is + the rank (number of dimensions) of `x`. If the input arrays have + different data types, array API Type Promotion Rules apply. + + Raises: + ValueError: if not all input arrays have the same shape + IndexError: if provided an `axis` outside of the required interval. + """ + res_dtype, res_usm_type, exec_q = _arrays_validation(arrays) + + n = len(arrays) + X0 = arrays[0] + res_ndim = X0.ndim + 1 + axis = normalize_axis_index(axis, res_ndim) + X0_shape = X0.shape + + for i in range(1, n): + if X0_shape != arrays[i].shape: + raise ValueError("All input arrays must have the same shape") + + res_shape = tuple( + X0_shape[i - 1 * (i >= axis)] if i != axis else n + for i in range(res_ndim) + ) + + res = dpt_ext.empty( + res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + _manager = dputils.SequentialOrderManager[exec_q] + dep_evs = _manager.submitted_events + for i in range(n): + c_shapes_copy = tuple( + i if j == axis else np.s_[:] for j in range(res_ndim) + ) + _dst = res[c_shapes_copy] + hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=arrays[i], dst=_dst, sycl_queue=exec_q, depends=dep_evs + ) + _manager.add_event_pair(hev, cpy_ev) + + return res + + +def swapaxes(X, axis1, axis2): + """swapaxes(x, axis1, axis2) + + Interchanges two axes of an array. + + Args: + x (usm_ndarray): input array + + axis1 (int): First axis. + If `x` has rank (i.e., number of dimensions) `N`, + a valid `axis` must be in the half-open interval `[-N, N)`. + + axis2 (int): Second axis. + If `x` has rank (i.e., number of dimensions) `N`, + a valid `axis` must be in the half-open interval `[-N, N)`. + + Returns: + usm_ndarray: + Array with swapped axes. + The returned array must has the same data type as `x`, + is created on the same device as `x` and has the same USM + allocation type as `x`. + + Raises: + AxisError: if `axis` value is invalid. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + axis1 = normalize_axis_index(axis1, X.ndim, "axis1") + axis2 = normalize_axis_index(axis2, X.ndim, "axis2") + + ind = list(range(0, X.ndim)) + ind[axis1] = axis2 + ind[axis2] = axis1 + return dpt_ext.permute_dims(X, tuple(ind)) + + +def unstack(X, /, *, axis=0): + """unstack(x, axis=0) + + Splits an array in a sequence of arrays along the given axis. + + Args: + x (usm_ndarray): input array + + axis (int, optional): axis along which `x` is unstacked. + If `x` has rank (i.e, number of dimensions) `N`, + a valid `axis` must reside in the half-open interval `[-N, N)`. + Default: `0`. + + Returns: + Tuple[usm_ndarray,...]: + Output sequence of arrays which are views into the input array. + + Raises: + AxisError: if the `axis` value is invalid. + """ + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + axis = normalize_axis_index(axis, X.ndim) + Y = dpt_ext.moveaxis(X, axis, 0) + + return tuple(Y[i] for i in range(Y.shape[0])) + + +def tile(x, repetitions, /): + """tile(x, repetitions) + + Repeat an input array `x` along each axis a number of times given by + `repetitions`. + + For `N` = len(`repetitions`) and `M` = len(`x.shape`): + + * If `M < N`, `x` will have `N - M` new axes prepended to its shape + * If `M > N`, `repetitions` will have `M - N` ones prepended to it + + Args: + x (usm_ndarray): input array + + repetitions (Union[int, Tuple[int, ...]]): + The number of repetitions along each dimension of `x`. + + Returns: + usm_ndarray: + tiled output array. + + The returned array will have rank `max(M, N)`. If `S` is the + shape of `x` after prepending dimensions and `R` is + `repetitions` after prepending ones, then the shape of the + result will be `S[i] * R[i]` for each dimension `i`. + + The returned array will have the same data type as `x`. + The returned array will be located on the same device as `x` and + have the same USM allocation type as `x`. + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(x)}.") + + if not isinstance(repetitions, tuple): + if isinstance(repetitions, int): + repetitions = (repetitions,) + else: + raise TypeError( + f"Expected tuple or integer type, got {type(repetitions)}." + ) + + rep_dims = len(repetitions) + x_dims = x.ndim + if rep_dims < x_dims: + repetitions = (x_dims - rep_dims) * (1,) + repetitions + elif x_dims < rep_dims: + x = dpt_ext.reshape(x, (rep_dims - x_dims) * (1,) + x.shape) + res_shape = tuple(map(lambda sh, rep: sh * rep, x.shape, repetitions)) + # case of empty input + if x.size == 0: + return dpt_ext.empty( + res_shape, + dtype=x.dtype, + usm_type=x.usm_type, + sycl_queue=x.sycl_queue, + ) + in_sh = x.shape + if res_shape == in_sh: + return dpt_ext.copy(x) + expanded_sh = [] + broadcast_sh = [] + out_sz = 1 + for i in range(len(res_shape)): + out_sz *= res_shape[i] + reps, sh = repetitions[i], in_sh[i] + if reps == 1: + # dimension will be unchanged + broadcast_sh.append(sh) + expanded_sh.append(sh) + elif sh == 1: + # dimension will be broadcast + broadcast_sh.append(reps) + expanded_sh.append(sh) + else: + broadcast_sh.extend([reps, sh]) + expanded_sh.extend([1, sh]) + exec_q = x.sycl_queue + xdt = x.dtype + xut = x.usm_type + res = dpt_ext.empty((out_sz,), dtype=xdt, usm_type=xut, sycl_queue=exec_q) + # no need to copy data for empty output + if out_sz > 0: + x = dpt_ext.broadcast_to( + # this reshape should never copy + dpt_ext.reshape(x, expanded_sh), + broadcast_sh, + ) + # copy broadcast input into flat array + _manager = dputils.SequentialOrderManager[exec_q] + dep_evs = _manager.submitted_events + hev, cp_ev = ti._copy_usm_ndarray_for_reshape( + src=x, dst=res, sycl_queue=exec_q, depends=dep_evs + ) + _manager.add_event_pair(hev, cp_ev) + return dpt_ext.reshape(res, res_shape) diff --git a/dpctl_ext/tensor/_reshape.py b/dpctl_ext/tensor/_reshape.py index 6afa1dc245c..b7b6b068bfd 100644 --- a/dpctl_ext/tensor/_reshape.py +++ b/dpctl_ext/tensor/_reshape.py @@ -37,6 +37,10 @@ _unravel_index, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext + __doc__ = "Implementation module for :func:`dpctl.tensor.reshape`." @@ -184,7 +188,7 @@ def reshape(X, /, shape, *, order="C", copy=None): src=X, dst=flat_res, sycl_queue=copy_q, depends=dep_evs ) else: - X_t = dpt.permute_dims(X, range(X.ndim - 1, -1, -1)) + X_t = dpt_ext.permute_dims(X, range(X.ndim - 1, -1, -1)) hev, r_e = _copy_usm_ndarray_for_reshape( src=X_t, dst=flat_res, sycl_queue=copy_q, depends=dep_evs ) diff --git a/dpctl_ext/tensor/_scalar_utils.py b/dpctl_ext/tensor/_scalar_utils.py index 86787baea8c..3ab92b42ad0 100644 --- a/dpctl_ext/tensor/_scalar_utils.py +++ b/dpctl_ext/tensor/_scalar_utils.py @@ -33,6 +33,10 @@ import numpy as np from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext + from ._type_utils import ( WeakBooleanType, WeakComplexType, @@ -59,7 +63,7 @@ def _get_dtype(o, dev): if isinstance(o, dpt.usm_ndarray): return o.dtype if hasattr(o, "__sycl_usm_array_interface__"): - return dpt.asarray(o).dtype + return dpt_ext.asarray(o).dtype if _is_buffer(o): host_dt = np.array(o).dtype dev_dt = _to_device_supported_dtype(host_dt, dev) diff --git a/dpctl_ext/tensor/_search_functions.py b/dpctl_ext/tensor/_search_functions.py index 053c68e1857..26100b0479f 100644 --- a/dpctl_ext/tensor/_search_functions.py +++ b/dpctl_ext/tensor/_search_functions.py @@ -291,7 +291,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if ti._array_overlap(condition, out) and not ti._same_logical_tensors( condition, out ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(x1, dpt.usm_ndarray): if ( @@ -299,7 +299,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): and not ti._same_logical_tensors(x1, out) and x1_dtype == out_dtype ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if isinstance(x2, dpt.usm_ndarray): if ( @@ -307,7 +307,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): and not ti._same_logical_tensors(x2, out) and x2_dtype == out_dtype ): - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) if order == "A": order = ( @@ -323,9 +323,9 @@ def where(condition, x1, x2, /, *, order="K", out=None): else "C" ) if not isinstance(x1, dpt.usm_ndarray): - x1 = dpt.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q) + x1 = dpt_ext.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q) if not isinstance(x2, dpt.usm_ndarray): - x2 = dpt.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q) + x2 = dpt_ext.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q) if condition.size == 0: if out is not None: @@ -342,7 +342,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): exec_q, ) else: - return dpt.empty( + return dpt_ext.empty( res_shape, dtype=out_dtype, order=order, @@ -356,7 +356,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if order == "K": _x1 = _empty_like_orderK(x1, out_dtype) else: - _x1 = dpt.empty_like(x1, dtype=out_dtype, order=order) + _x1 = dpt_ext.empty_like(x1, dtype=out_dtype, order=order) ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=x1, dst=_x1, sycl_queue=exec_q, depends=dep_evs ) @@ -367,7 +367,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if order == "K": _x2 = _empty_like_orderK(x2, out_dtype) else: - _x2 = dpt.empty_like(x2, dtype=out_dtype, order=order) + _x2 = dpt_ext.empty_like(x2, dtype=out_dtype, order=order) ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=x2, dst=_x2, sycl_queue=exec_q, depends=dep_evs ) @@ -380,7 +380,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): condition, x1, x2, out_dtype, res_shape, out_usm_type, exec_q ) else: - out = dpt.empty( + out = dpt_ext.empty( res_shape, dtype=out_dtype, order=order, @@ -389,11 +389,11 @@ def where(condition, x1, x2, /, *, order="K", out=None): ) if condition_shape != res_shape: - condition = dpt.broadcast_to(condition, res_shape) + condition = dpt_ext.broadcast_to(condition, res_shape) if x1_shape != res_shape: - x1 = dpt.broadcast_to(x1, res_shape) + x1 = dpt_ext.broadcast_to(x1, res_shape) if x2_shape != res_shape: - x2 = dpt.broadcast_to(x2, res_shape) + x2 = dpt_ext.broadcast_to(x2, res_shape) dep_evs = _manager.submitted_events hev, where_ev = ti._where( diff --git a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp index 26ae46707a6..27074cd2d24 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp @@ -54,6 +54,10 @@ using dpctl::tensor::ssize_t; @defgroup CtorKernels */ +template +class linear_sequence_step_kernel; +template +class linear_sequence_affine_kernel; template class full_strided_kernel; template @@ -61,6 +65,179 @@ class eye_kernel; using namespace dpctl::tensor::offset_utils; +template +class LinearSequenceStepFunctor +{ +private: + Ty *p = nullptr; + Ty start_v; + Ty step_v; + +public: + LinearSequenceStepFunctor(char *dst_p, Ty v0, Ty dv) + : p(reinterpret_cast(dst_p)), start_v(v0), step_v(dv) + { + } + + void operator()(sycl::id<1> wiid) const + { + auto i = wiid.get(0); + using dpctl::tensor::type_utils::is_complex; + if constexpr (is_complex::value) { + p[i] = Ty{start_v.real() + i * step_v.real(), + start_v.imag() + i * step_v.imag()}; + } + else { + p[i] = start_v + i * step_v; + } + } +}; + +/*! + * @brief Function to submit kernel to populate given contiguous memory + * allocation with linear sequence specified by typed starting value and + * increment. + * + * @param q Sycl queue to which the kernel is submitted + * @param nelems Length of the sequence + * @param start_v Typed starting value of the sequence + * @param step_v Typed increment of the sequence + * @param array_data Kernel accessible USM pointer to the start of array to be + * populated. + * @param depends List of events to wait for before starting computations, if + * any. + * + * @return Event to wait on to ensure that computation completes. + * @defgroup CtorKernels + */ +template +sycl::event lin_space_step_impl(sycl::queue &exec_q, + std::size_t nelems, + Ty start_v, + Ty step_v, + char *array_data, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(exec_q); + sycl::event lin_space_step_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.parallel_for>( + sycl::range<1>{nelems}, + LinearSequenceStepFunctor(array_data, start_v, step_v)); + }); + + return lin_space_step_event; +} + +// Constructor to populate tensor with linear sequence defined by +// start and and data + +template +class LinearSequenceAffineFunctor +{ +private: + Ty *p = nullptr; + Ty start_v; + Ty end_v; + std::size_t n; + +public: + LinearSequenceAffineFunctor(char *dst_p, Ty v0, Ty v1, std::size_t den) + : p(reinterpret_cast(dst_p)), start_v(v0), end_v(v1), + n((den == 0) ? 1 : den) + { + } + + void operator()(sycl::id<1> wiid) const + { + auto i = wiid.get(0); + wTy wc = wTy(i) / n; + wTy w = wTy(n - i) / n; + using dpctl::tensor::type_utils::is_complex; + if constexpr (is_complex::value) { + using reT = typename Ty::value_type; + auto _w = static_cast(w); + auto _wc = static_cast(wc); + auto re_comb = sycl::fma(start_v.real(), _w, reT(0)); + re_comb = + sycl::fma(end_v.real(), _wc, + re_comb); // start_v.real() * _w + end_v.real() * _wc; + auto im_comb = + sycl::fma(start_v.imag(), _w, + reT(0)); // start_v.imag() * _w + end_v.imag() * _wc; + im_comb = sycl::fma(end_v.imag(), _wc, im_comb); + Ty affine_comb = Ty{re_comb, im_comb}; + p[i] = affine_comb; + } + else if constexpr (std::is_floating_point::value) { + Ty _w = static_cast(w); + Ty _wc = static_cast(wc); + auto affine_comb = + sycl::fma(start_v, _w, Ty(0)); // start_v * w + end_v * wc; + affine_comb = sycl::fma(end_v, _wc, affine_comb); + p[i] = affine_comb; + } + else { + using dpctl::tensor::type_utils::convert_impl; + auto affine_comb = start_v * w + end_v * wc; + p[i] = convert_impl(affine_comb); + } + } +}; + +/*! + * @brief Function to submit kernel to populate given contiguous memory + * allocation with linear sequence specified by typed starting and end values. + * + * @param exec_q Sycl queue to which kernel is submitted for execution. + * @param nelems Length of the sequence. + * @param start_v Stating value of the sequence. + * @param end_v End-value of the sequence. + * @param include_endpoint Whether the end-value is included in the sequence. + * @param array_data Kernel accessible USM pointer to the start of array to be + * populated. + * @param depends List of events to wait for before starting computations, if + * any. + * + * @return Event to wait on to ensure that computation completes. + * @defgroup CtorKernels + */ +template +sycl::event lin_space_affine_impl(sycl::queue &exec_q, + std::size_t nelems, + Ty start_v, + Ty end_v, + bool include_endpoint, + char *array_data, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(exec_q); + + const bool device_supports_doubles = + exec_q.get_device().has(sycl::aspect::fp64); + const std::size_t den = (include_endpoint) ? nelems - 1 : nelems; + + sycl::event lin_space_affine_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + if (device_supports_doubles) { + using KernelName = linear_sequence_affine_kernel; + using Impl = LinearSequenceAffineFunctor; + + cgh.parallel_for(sycl::range<1>{nelems}, + Impl(array_data, start_v, end_v, den)); + } + else { + using KernelName = linear_sequence_affine_kernel; + using Impl = LinearSequenceAffineFunctor; + + cgh.parallel_for(sycl::range<1>{nelems}, + Impl(array_data, start_v, end_v, den)); + } + }); + + return lin_space_affine_event; +} + /* ================ Full ================== */ /*! diff --git a/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp b/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp new file mode 100644 index 00000000000..5204f24b372 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp @@ -0,0 +1,308 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions +//===---------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include + +#include "kernels/constructors.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "linear_sequences.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +// Constructor to populate tensor with linear sequence defined by +// start and step data + +typedef sycl::event (*lin_space_step_fn_ptr_t)( + sycl::queue &, + std::size_t, // num_elements + const py::object &start, + const py::object &step, + char *, // dst_data_ptr + const std::vector &); + +/*! + * @brief Function to submit kernel to populate given contiguous memory + * allocation with linear sequence specified by starting value and increment + * given as Python objects. + * + * @param q Sycl queue to which the kernel is submitted + * @param nelems Length of the sequence + * @param start Starting value of the sequence as Python object. Must be + * convertible to array element data type `Ty`. + * @param step Increment of the sequence as Python object. Must be convertible + * to array element data type `Ty`. + * @param array_data Kernel accessible USM pointer to the start of array to be + * populated. + * @param depends List of events to wait for before starting computations, if + * any. + * + * @return Event to wait on to ensure that computation completes. + * @defgroup CtorKernels + */ +template +sycl::event lin_space_step_impl(sycl::queue &exec_q, + std::size_t nelems, + const py::object &start, + const py::object &step, + char *array_data, + const std::vector &depends) +{ + Ty start_v = py::cast(start); + Ty step_v = py::cast(step); + + using dpctl::tensor::kernels::constructors::lin_space_step_impl; + + auto lin_space_step_event = lin_space_step_impl( + exec_q, nelems, start_v, step_v, array_data, depends); + + return lin_space_step_event; +} + +typedef sycl::event (*lin_space_affine_fn_ptr_t)( + sycl::queue &, + std::size_t, // num_elements + const py::object &start, + const py::object &end, + bool include_endpoint, + char *, // dst_data_ptr + const std::vector &); + +/*! + * @brief Function to submit kernel to populate given contiguous memory + * allocation with linear sequence specified by starting and end values given + * as Python objects. + * + * @param exec_q Sycl queue to which kernel is submitted for execution. + * @param nelems Length of the sequence + * @param start Stating value of the sequence as Python object. Must be + * convertible to array data element type `Ty`. + * @param end End-value of the sequence as Python object. Must be convertible + * to array data element type `Ty`. + * @param include_endpoint Whether the end-value is included in the sequence + * @param array_data Kernel accessible USM pointer to the start of array to be + * populated. + * @param depends List of events to wait for before starting computations, if + * any. + * + * @return Event to wait on to ensure that computation completes. + * @defgroup CtorKernels + */ +template +sycl::event lin_space_affine_impl(sycl::queue &exec_q, + std::size_t nelems, + const py::object &start, + const py::object &end, + bool include_endpoint, + char *array_data, + const std::vector &depends) +{ + Ty start_v = py::cast(start); + Ty end_v = py::cast(end); + + using dpctl::tensor::kernels::constructors::lin_space_affine_impl; + + auto lin_space_affine_event = lin_space_affine_impl( + exec_q, nelems, start_v, end_v, include_endpoint, array_data, depends); + + return lin_space_affine_event; +} + +using dpctl::utils::keep_args_alive; + +static lin_space_step_fn_ptr_t lin_space_step_dispatch_vector[td_ns::num_types]; + +static lin_space_affine_fn_ptr_t + lin_space_affine_dispatch_vector[td_ns::num_types]; + +std::pair + usm_ndarray_linear_sequence_step(const py::object &start, + const py::object &dt, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends) +{ + // dst must be 1D and C-contiguous + // start, end should be coercible into data type of dst + + if (dst.get_ndim() != 1) { + throw py::value_error( + "usm_ndarray_linspace: Expecting 1D array to populate"); + } + + if (!dst.is_c_contiguous()) { + throw py::value_error( + "usm_ndarray_linspace: Non-contiguous arrays are not supported"); + } + + if (!dpctl::utils::queues_are_compatible(exec_q, {dst})) { + throw py::value_error( + "Execution queue is not compatible with the allocation queue"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + + auto array_types = td_ns::usm_ndarray_types(); + int dst_typenum = dst.get_typenum(); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + py::ssize_t len = dst.get_shape(0); + if (len == 0) { + // nothing to do + return std::make_pair(sycl::event{}, sycl::event{}); + } + + char *dst_data = dst.get_data(); + sycl::event linspace_step_event; + + auto fn = lin_space_step_dispatch_vector[dst_typeid]; + + linspace_step_event = + fn(exec_q, static_cast(len), start, dt, dst_data, depends); + + return std::make_pair(keep_args_alive(exec_q, {dst}, {linspace_step_event}), + linspace_step_event); +} + +std::pair + usm_ndarray_linear_sequence_affine(const py::object &start, + const py::object &end, + const dpctl::tensor::usm_ndarray &dst, + bool include_endpoint, + sycl::queue &exec_q, + const std::vector &depends) +{ + // dst must be 1D and C-contiguous + // start, end should be coercible into data type of dst + + if (dst.get_ndim() != 1) { + throw py::value_error( + "usm_ndarray_linspace: Expecting 1D array to populate"); + } + + if (!dst.is_c_contiguous()) { + throw py::value_error( + "usm_ndarray_linspace: Non-contiguous arrays are not supported"); + } + + if (!dpctl::utils::queues_are_compatible(exec_q, {dst})) { + throw py::value_error( + "Execution queue context is not the same as allocation context"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + + auto array_types = td_ns::usm_ndarray_types(); + int dst_typenum = dst.get_typenum(); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + py::ssize_t len = dst.get_shape(0); + if (len == 0) { + // nothing to do + return std::make_pair(sycl::event{}, sycl::event{}); + } + + char *dst_data = dst.get_data(); + sycl::event linspace_affine_event; + + auto fn = lin_space_affine_dispatch_vector[dst_typeid]; + + linspace_affine_event = fn(exec_q, static_cast(len), start, + end, include_endpoint, dst_data, depends); + + return std::make_pair( + keep_args_alive(exec_q, {dst}, {linspace_affine_event}), + linspace_affine_event); +} + +/*! + * @brief Factor to get function pointer of type `fnT` for array with elements + * of type `Ty`. + * @defgroup CtorKernels + */ +template +struct LinSpaceStepFactory +{ + fnT get() + { + fnT f = lin_space_step_impl; + return f; + } +}; + +/*! + * @brief Factory to get function pointer of type `fnT` for array data type + * `Ty`. + */ +template +struct LinSpaceAffineFactory +{ + fnT get() + { + fnT f = lin_space_affine_impl; + return f; + } +}; + +void init_linear_sequences_dispatch_vectors(void) +{ + using namespace td_ns; + + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(lin_space_step_dispatch_vector); + + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(lin_space_affine_dispatch_vector); +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/linear_sequences.hpp b/dpctl_ext/tensor/libtensor/source/linear_sequences.hpp new file mode 100644 index 00000000000..45cf4515346 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/linear_sequences.hpp @@ -0,0 +1,66 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include + +#include + +#include "dpnp4pybind11.hpp" +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern std::pair usm_ndarray_linear_sequence_step( + const py::object &start, + const py::object &dt, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}); + +extern std::pair usm_ndarray_linear_sequence_affine( + const py::object &start, + const py::object &end, + const dpctl::tensor::usm_ndarray &dst, + bool include_endpoint, + sycl::queue &exec_q, + const std::vector &depends = {}); + +extern void init_linear_sequences_dispatch_vectors(void); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp index e6bc3b5dfb6..7bed4df01d2 100644 --- a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp +++ b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp @@ -56,7 +56,7 @@ #include "full_ctor.hpp" #include "integer_advanced_indexing.hpp" #include "kernels/dpctl_tensor_types.hpp" -// #include "linear_sequences.hpp" +#include "linear_sequences.hpp" #include "repeat.hpp" #include "simplify_iteration_space.hpp" #include "triul_ctor.hpp" @@ -97,8 +97,8 @@ using dpctl::tensor::py_internal::copy_numpy_ndarray_into_usm_ndarray; /* ============= linear-sequence ==================== */ -// using dpctl::tensor::py_internal::usm_ndarray_linear_sequence_affine; -// using dpctl::tensor::py_internal::usm_ndarray_linear_sequence_step; +using dpctl::tensor::py_internal::usm_ndarray_linear_sequence_affine; +using dpctl::tensor::py_internal::usm_ndarray_linear_sequence_step; /* ================ Full ================== */ @@ -157,7 +157,7 @@ void init_dispatch_vectors(void) init_copy_as_contig_dispatch_vectors(); init_copy_for_reshape_dispatch_vectors(); init_copy_for_roll_dispatch_vectors(); - // init_linear_sequences_dispatch_vectors(); + init_linear_sequences_dispatch_vectors(); init_full_ctor_dispatch_vectors(); init_zeros_ctor_dispatch_vectors(); init_eye_ctor_dispatch_vectors(); @@ -300,20 +300,22 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("src"), py::arg("dst"), py::arg("shifts"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - // m.def("_linspace_step", &usm_ndarray_linear_sequence_step, - // "Fills input 1D contiguous usm_ndarray `dst` with linear - // sequence " "specified by " "starting point `start` and step - // `dt`. " "Returns a tuple of events: (ht_event, comp_event)", - // py::arg("start"), py::arg("dt"), py::arg("dst"), - // py::arg("sycl_queue"), py::arg("depends") = py::list()); - - // m.def("_linspace_affine", &usm_ndarray_linear_sequence_affine, - // "Fills input 1D contiguous usm_ndarray `dst` with linear - // sequence " "specified by " "starting point `start` and end - // point `end`. " "Returns a tuple of events: (ht_event, - // comp_event)", py::arg("start"), py::arg("end"), py::arg("dst"), - // py::arg("include_endpoint"), py::arg("sycl_queue"), - // py::arg("depends") = py::list()); + m.def("_linspace_step", &usm_ndarray_linear_sequence_step, + "Fills input 1D contiguous usm_ndarray `dst` with linear sequence " + "specified by " + "starting point `start` and step `dt`. " + "Returns a tuple of events: (ht_event, comp_event)", + py::arg("start"), py::arg("dt"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + m.def("_linspace_affine", &usm_ndarray_linear_sequence_affine, + "Fills input 1D contiguous usm_ndarray `dst` with linear sequence " + "specified by " + "starting point `start` and end point `end`. " + "Returns a tuple of events: (ht_event, comp_event)", + py::arg("start"), py::arg("end"), py::arg("dst"), + py::arg("include_endpoint"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); m.def("_copy_numpy_ndarray_into_usm_ndarray", ©_numpy_ndarray_into_usm_ndarray, diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index f3dd1815356..4e2ee8531a1 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -53,7 +53,7 @@ def _as_usm_ndarray(a, usm_type, sycl_queue): if isinstance(a, dpnp_array): a = a.get_array() - return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) + return dpt_ext.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) def _check_has_zero_val(a): @@ -196,7 +196,7 @@ def dpnp_linspace( if dpnp.isscalar(start) and dpnp.isscalar(stop): # Call linspace() function for scalars. - usm_res = dpt.linspace( + usm_res = dpt_ext.linspace( start, stop, num, @@ -213,19 +213,19 @@ def dpnp_linspace( else: step = dpnp.nan else: - usm_start = dpt.asarray( + usm_start = dpt_ext.asarray( start, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) - usm_stop = dpt.asarray( + usm_stop = dpt_ext.asarray( stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized ) delta = usm_stop - usm_start - usm_res = dpt.arange( + usm_res = dpt_ext.arange( 0, stop=num, step=1, @@ -256,7 +256,7 @@ def dpnp_linspace( usm_res[-1, ...] = usm_stop if axis != 0: - usm_res = dpt.moveaxis(usm_res, 0, axis) + usm_res = dpt_ext.moveaxis(usm_res, 0, axis) if dpnp.issubdtype(dtype, dpnp.integer): dpt.floor(usm_res, out=usm_res) @@ -266,7 +266,7 @@ def dpnp_linspace( if retstep is True: if dpnp.isscalar(step): - step = dpt.asarray( + step = dpt_ext.asarray( step, usm_type=res.usm_type, sycl_queue=res.sycl_queue ) return res, dpnp_array._create_from_usm_ndarray(step) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 55d74e8c180..b3e0c74c228 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -467,7 +467,7 @@ def __call__( ) # Allocate a temporary buffer with the required dtype - out[i] = dpt.empty_like(res, dtype=res_dt) + out[i] = dpt_ext.empty_like(res, dtype=res_dt) elif ( buf_dt is None and dti._array_overlap(x, res) @@ -476,7 +476,7 @@ def __call__( # Allocate a temporary buffer to avoid memory overlapping. # Note if `buf_dt` is not None, a temporary copy of `x` will be # created, so the array overlap check isn't needed. - out[i] = dpt.empty_like(res) + out[i] = dpt_ext.empty_like(res) _manager = dpu.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -486,7 +486,7 @@ def __call__( if order == "K": buf = dtc._empty_like_orderK(x, buf_dt) else: - buf = dpt.empty_like(x, dtype=buf_dt, order=order) + buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order) ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs @@ -503,7 +503,7 @@ def __call__( if order == "K": out[i] = dtc._empty_like_orderK(x, res_dt) else: - out[i] = dpt.empty_like(x, dtype=res_dt, order=order) + out[i] = dpt_ext.empty_like(x, dtype=res_dt, order=order) # Call the unary function with input and output arrays ht_unary_ev, unary_ev = self.get_implementation_function()( @@ -713,7 +713,7 @@ def __call__( if dtype is not None: if dpnp.isscalar(x1): - x1_usm = dpt.asarray( + x1_usm = dpt_ext.asarray( x1, dtype=dtype, sycl_queue=x2.sycl_queue, @@ -722,7 +722,7 @@ def __call__( x2_usm = dpt_ext.astype(x2_usm, dtype, copy=False) elif dpnp.isscalar(x2): x1_usm = dpt_ext.astype(x1_usm, dtype, copy=False) - x2_usm = dpt.asarray( + x2_usm = dpt_ext.asarray( x2, dtype=dtype, sycl_queue=x1.sycl_queue, @@ -1078,7 +1078,7 @@ def __call__( ) # Allocate a temporary buffer with the required dtype - out[i] = dpt.empty_like(res, dtype=res_dt) + out[i] = dpt_ext.empty_like(res, dtype=res_dt) else: # If `dt` is not None, a temporary copy of `x` will be created, # so the array overlap check isn't needed. @@ -1094,7 +1094,7 @@ def __call__( for x in x_to_check ): # allocate a temporary buffer to avoid memory overlapping - out[i] = dpt.empty_like(res) + out[i] = dpt_ext.empty_like(res) x1 = dpnp.as_usm_ndarray(x1, dtype=x1_dt, sycl_queue=exec_q) x2 = dpnp.as_usm_ndarray(x2, dtype=x2_dt, sycl_queue=exec_q) @@ -1127,7 +1127,7 @@ def __call__( if order == "K": buf = dtc._empty_like_orderK(x, buf_dt) else: - buf = dpt.empty_like(x, dtype=buf_dt, order=order) + buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order) ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs @@ -1146,7 +1146,7 @@ def __call__( x1, x2, res_dt, res_shape, res_usm_type, exec_q ) else: - out[i] = dpt.empty( + out[i] = dpt_ext.empty( res_shape, dtype=res_dt, order=order, @@ -1156,9 +1156,9 @@ def __call__( # Broadcast shapes of input arrays if x1.shape != res_shape: - x1 = dpt.broadcast_to(x1, res_shape) + x1 = dpt_ext.broadcast_to(x1, res_shape) if x2.shape != res_shape: - x2 = dpt.broadcast_to(x2, res_shape) + x2 = dpt_ext.broadcast_to(x2, res_shape) # Call the binary function with input and output arrays ht_binary_ev, binary_ev = self.get_implementation_function()( diff --git a/dpnp/dpnp_algo/dpnp_fill.py b/dpnp/dpnp_algo/dpnp_fill.py index ddba9f634cb..7e0a70f25ff 100644 --- a/dpnp/dpnp_algo/dpnp_fill.py +++ b/dpnp/dpnp_algo/dpnp_fill.py @@ -28,13 +28,12 @@ from numbers import Number -import dpctl.tensor as dpt import dpctl.utils as dpu from dpctl.tensor._ctors import _cast_fill_val # TODO: revert to `from dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from dpctl_ext.tensor._tensor_impl import ( _copy_usm_ndarray_into_usm_ndarray, @@ -56,7 +55,7 @@ def dpnp_fill(arr, val): raise dpu.ExecutionPlacementError( "Input arrays have incompatible queues." ) - a_val = dpt_ext.astype(val, arr.dtype) + a_val = dpt.astype(val, arr.dtype) a_val = dpt.broadcast_to(a_val, arr.shape) _manager = dpu.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 564627bacf2..0d05ef8d49d 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -2283,7 +2283,7 @@ def transpose(self, *axes): # self.transpose(None).shape == self.shape[::-1] axes = tuple((ndim - x - 1) for x in range(ndim)) - usm_res = dpt.permute_dims(self._array_obj, axes) + usm_res = dpt_ext.permute_dims(self._array_obj, axes) return dpnp_array._create_from_usm_ndarray(usm_res) def var( diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 0727b9bfd77..9fe95574659 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -35,12 +35,11 @@ """ -import dpctl.tensor as dpt import dpctl.utils as dpu # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from dpnp.dpnp_array import dpnp_array @@ -143,7 +142,7 @@ def copy(x1, /, *, order="K"): if order is None: order = "K" - array_obj = dpt_ext.copy(dpnp.get_usm_ndarray(x1), order=order) + array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order) return dpnp_array._create_from_usm_ndarray(array_obj) @@ -196,7 +195,7 @@ def eye( order = "C" """Creates `dpnp_array` with ones on the `k`th diagonal.""" - array_obj = dpt_ext.eye( + array_obj = dpt.eye( N, M, k=k, @@ -231,7 +230,7 @@ def full( fill_value = fill_value.get_array() """Creates `dpnp_array` having a specified shape, filled with fill_value.""" - array_obj = dpt_ext.full( + array_obj = dpt.full( shape, fill_value, dtype=dtype, @@ -272,13 +271,13 @@ def ones( def tril(x1, /, *, k=0): """Creates `dpnp_array` as lower triangular part of an input array.""" - array_obj = dpt_ext.tril(dpnp.get_usm_ndarray(x1), k=k) + array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k) return dpnp_array._create_from_usm_ndarray(array_obj) def triu(x1, /, *, k=0): """Creates `dpnp_array` as upper triangular part of an input array.""" - array_obj = dpt_ext.triu(dpnp.get_usm_ndarray(x1), k=k) + array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k) return dpnp_array._create_from_usm_ndarray(array_obj) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 6c050a20898..9fca083a641 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -191,7 +191,7 @@ def as_usm_ndarray(a, dtype=None, device=None, usm_type=None, sycl_queue=None): if is_supported_array_type(a): return get_usm_ndarray(a) - return dpt.asarray( + return dpt_ext.asarray( a, dtype=dtype, device=device, usm_type=usm_type, sycl_queue=sycl_queue ) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 52fc4b7f644..d09cc17bde7 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -3131,7 +3131,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): output[1] = dpt_ext.reshape(output[1], (-1, 1) + s0[2:]) if not sparse: - output = dpt.broadcast_arrays(*output) + output = dpt_ext.broadcast_arrays(*output) if copy: output = [dpt_ext.copy(x) for x in output] @@ -3696,7 +3696,7 @@ def tri( if usm_type is None: usm_type = "device" - m = dpt.ones( + m = dpt_ext.ones( (N, M), dtype=_dtype, device=device, @@ -3912,7 +3912,7 @@ def vander( if dpnp.is_supported_array_type(x): x = dpnp.get_usm_ndarray(x) - usm_x = dpt.asarray( + usm_x = dpt_ext.asarray( x, device=device, usm_type=usm_type, sycl_queue=sycl_queue ) @@ -3935,7 +3935,7 @@ def vander( tmp = m[:, ::-1] if not increasing else m dpnp.power( dpt_ext.reshape(usm_x, (-1, 1)), - dpt.arange( + dpt_ext.arange( N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue ), out=tmp, diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index bc190db70c4..a52196e9e4d 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -141,9 +141,9 @@ def _choose_run(inds, chcs, q, usm_type, out=None, mode=0): ti._array_overlap(out, chc) for chc in chcs ): # Allocate a temporary buffer to avoid memory overlapping. - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) else: - out = dpt.empty( + out = dpt_ext.empty( inds.shape, dtype=chcs[0].dtype, usm_type=usm_type, sycl_queue=q ) @@ -260,7 +260,7 @@ def choose(a, choices, out=None, mode="wrap"): choices, ) ) - arrs_broadcast = dpt.broadcast_arrays(inds, *choices) + arrs_broadcast = dpt_ext.broadcast_arrays(inds, *choices) inds = arrs_broadcast[0] choices = tuple(arrs_broadcast[1:]) @@ -301,9 +301,11 @@ def _take_index(x, inds, axis, q, usm_type, out=None, mode=0): if ti._array_overlap(x, out): # Allocate a temporary buffer to avoid memory overlapping. - out = dpt.empty_like(out) + out = dpt_ext.empty_like(out) else: - out = dpt.empty(res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q) + out = dpt_ext.empty( + res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q + ) _manager = dpu.SequentialOrderManager[q] dep_evs = _manager.submitted_events @@ -1803,7 +1805,7 @@ def put_along_axis(a, ind, values, axis, mode="wrap"): if dpnp.is_supported_array_type(values): usm_vals = dpnp.get_usm_ndarray(values) else: - usm_vals = dpt.asarray( + usm_vals = dpt_ext.asarray( values, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) @@ -2151,7 +2153,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"): usm_a = dpnp.get_usm_ndarray(a) if not dpnp.is_supported_array_type(indices): - usm_ind = dpt.asarray( + usm_ind = dpt_ext.asarray( indices, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) else: diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 08fd55c58ac..c034d1c4379 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -375,7 +375,7 @@ def _get_first_nan_index(usm_a): ): if dpnp.issubdtype(usm_a.dtype, dpnp.complexfloating): # for complex all NaNs are considered equivalent - true_val = dpt.asarray( + true_val = dpt_ext.asarray( True, sycl_queue=usm_a.sycl_queue, usm_type=usm_a.usm_type ) return dpt.searchsorted(dpt.isnan(usm_a), true_val, side="left") @@ -1093,7 +1093,9 @@ def broadcast_arrays(*args, subok=False): if len(args) == 0: return [] - usm_arrays = dpt.broadcast_arrays(*[dpnp.get_usm_ndarray(a) for a in args]) + usm_arrays = dpt_ext.broadcast_arrays( + *[dpnp.get_usm_ndarray(a) for a in args] + ) return [dpnp_array._create_from_usm_ndarray(a) for a in usm_arrays] @@ -1178,7 +1180,7 @@ def broadcast_to(array, /, shape, subok=False): raise NotImplementedError(f"subok={subok} is currently not supported") usm_array = dpnp.get_usm_ndarray(array) - new_array = dpt.broadcast_to(usm_array, shape) + new_array = dpt_ext.broadcast_to(usm_array, shape) return dpnp_array._create_from_usm_ndarray(new_array) @@ -1416,7 +1418,7 @@ def concatenate( ) usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] - usm_res = dpt.concat(usm_arrays, axis=axis) + usm_res = dpt_ext.concat(usm_arrays, axis=axis) res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: @@ -1521,7 +1523,7 @@ def copyto(dst, src, casting="same_kind", where=True): f"but got {where.dtype}" ) - dst_usm, src_usm, mask_usm = dpt.broadcast_arrays( + dst_usm, src_usm, mask_usm = dpt_ext.broadcast_arrays( dpnp.get_usm_ndarray(dst), dpnp.get_usm_ndarray(src), dpnp.get_usm_ndarray(where), @@ -1849,7 +1851,7 @@ def expand_dims(a, axis): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt.expand_dims(usm_a, axis=axis) + usm_res = dpt_ext.expand_dims(usm_a, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1920,7 +1922,7 @@ def flip(m, axis=None): """ m_usm = dpnp.get_usm_ndarray(m) - return dpnp_array._create_from_usm_ndarray(dpt.flip(m_usm, axis=axis)) + return dpnp_array._create_from_usm_ndarray(dpt_ext.flip(m_usm, axis=axis)) def fliplr(m): @@ -2408,7 +2410,7 @@ def moveaxis(a, source, destination): usm_array = dpnp.get_usm_ndarray(a) return dpnp_array._create_from_usm_ndarray( - dpt.moveaxis(usm_array, source, destination) + dpt_ext.moveaxis(usm_array, source, destination) ) @@ -3663,7 +3665,7 @@ def squeeze(a, /, axis=None): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt.squeeze(usm_a, axis=axis) + usm_res = dpt_ext.squeeze(usm_a, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3751,7 +3753,7 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): ) usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] - usm_res = dpt.stack(usm_arrays, axis=axis) + usm_res = dpt_ext.stack(usm_arrays, axis=axis) res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: @@ -3812,7 +3814,7 @@ def swapaxes(a, axis1, axis2): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2) + usm_res = dpt_ext.swapaxes(usm_a, axis1=axis1, axis2=axis2) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3892,7 +3894,7 @@ def tile(A, reps): """ usm_a = dpnp.get_usm_ndarray(A) - usm_res = dpt.tile(usm_a, reps) + usm_res = dpt_ext.tile(usm_a, reps) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -4522,7 +4524,7 @@ def unstack(x, /, *, axis=0): if usm_x.ndim == 0: raise ValueError("Input array must be at least 1-d.") - res = dpt.unstack(usm_x, axis=axis) + res = dpt_ext.unstack(usm_x, axis=axis) return tuple(dpnp_array._create_from_usm_ndarray(a) for a in res) diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index a2389978d50..15f52338ec7 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -376,7 +376,7 @@ def searchsorted(a, v, side="left", sorter=None): usm_a = dpnp.get_usm_ndarray(a) if dpnp.isscalar(v): - usm_v = dpt.asarray(v, sycl_queue=a.sycl_queue, usm_type=a.usm_type) + usm_v = dpt_ext.asarray(v, sycl_queue=a.sycl_queue, usm_type=a.usm_type) else: usm_v = dpnp.get_usm_ndarray(v) diff --git a/dpnp/fft/dpnp_utils_fft.py b/dpnp/fft/dpnp_utils_fft.py index 534b9404254..e3f35a0201e 100644 --- a/dpnp/fft/dpnp_utils_fft.py +++ b/dpnp/fft/dpnp_utils_fft.py @@ -42,11 +42,6 @@ from collections.abc import Sequence import dpctl - -# pylint: disable=no-name-in-module -# TODO: remove it when ti.__linspace_step -# is migrated to dpctl_ext/tensor -import dpctl.tensor._tensor_impl as ti import dpctl.utils as dpu import numpy from dpctl.tensor._numpy_helper import ( @@ -55,10 +50,9 @@ ) from dpctl.utils import ExecutionPlacementError -# pylint: disable=no-name-in-module # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor._tensor_impl as ti_ext +import dpctl_ext.tensor._tensor_impl as ti import dpnp import dpnp.backend.extensions.fft._fft_impl as fi @@ -205,7 +199,7 @@ def _compute_result(dsc, a, out, forward, c2c, out_strides): if ( out is not None and out.strides == tuple(out_strides) - and not ti_ext._array_overlap(a_usm, dpnp.get_usm_ndarray(out)) + and not ti._array_overlap(a_usm, dpnp.get_usm_ndarray(out)) ): res_usm = out_usm result = out @@ -538,7 +532,7 @@ def _truncate_or_pad(a, shape, axes): ) _manager = dpu.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events - ht_copy_ev, copy_ev = ti_ext._copy_usm_ndarray_into_usm_ndarray( + ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=dpnp.get_usm_ndarray(a), dst=z.get_array()[tuple(index)], sycl_queue=exec_q, diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py index 88e6aacb997..698e22b9f87 100644 --- a/dpnp/tests/test_arraycreation.py +++ b/dpnp/tests/test_arraycreation.py @@ -668,7 +668,7 @@ def test_tri_default_dtype(): 5, numpy.array(1), dpnp.array(2), - dpt.asarray(3), + dpt_ext.asarray(3), ], ids=[ "-3", @@ -682,7 +682,7 @@ def test_tri_default_dtype(): "5", "np.array(1)", "dpnp.array(2)", - "dpt.asarray(3)", + "dpt_ext.asarray(3)", ], ) @pytest.mark.parametrize( @@ -725,7 +725,7 @@ def test_tril(m, k, dtype): 5, numpy.array(1), dpnp.array(2), - dpt.asarray(3), + dpt_ext.asarray(3), ], ids=[ "-3", @@ -739,7 +739,7 @@ def test_tril(m, k, dtype): "5", "np.array(1)", "dpnp.array(2)", - "dpt.asarray(3)", + "dpt_ext.asarray(3)", ], ) @pytest.mark.parametrize( @@ -972,7 +972,7 @@ def test_ones_like(array, dtype, order): ], ) def test_dpctl_tensor_input(func, args): - x0 = dpt_ext.reshape(dpt.arange(9), (3, 3)) + x0 = dpt_ext.reshape(dpt_ext.arange(9), (3, 3)) new_args = [eval(val, {"x0": x0}) for val in args] X = getattr(dpt, func)(*new_args) Y = getattr(dpnp, func)(*new_args) diff --git a/dpnp/tests/test_arraymanipulation.py b/dpnp/tests/test_arraymanipulation.py index ba83ee94d8b..7d5d2efeebb 100644 --- a/dpnp/tests/test_arraymanipulation.py +++ b/dpnp/tests/test_arraymanipulation.py @@ -1,11 +1,11 @@ -import warnings - -import dpctl.tensor as dpt import numpy import pytest from dpctl.tensor._numpy_helper import AxisError from numpy.testing import assert_array_equal, assert_equal, assert_raises +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from .helper import get_all_dtypes, get_float_complex_dtypes diff --git a/dpnp/tests/test_fft.py b/dpnp/tests/test_fft.py index 22642005774..3a19a2cf366 100644 --- a/dpnp/tests/test_fft.py +++ b/dpnp/tests/test_fft.py @@ -1,10 +1,12 @@ import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.utils import ExecutionPlacementError from numpy.testing import assert_raises +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from dpnp.dpnp_utils import map_dtype_to_device diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index 79c41a2f45f..d8822d77080 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -1,7 +1,6 @@ import functools import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.utils import ExecutionPlacementError @@ -13,10 +12,10 @@ assert_raises_regex, ) -import dpnp - # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt +import dpnp from dpctl_ext.tensor._numpy_helper import AxisError from dpctl_ext.tensor._type_utils import _to_device_supported_dtype from dpnp.dpnp_array import dpnp_array diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 31d99d71ce4..b9673d21a16 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -1,7 +1,6 @@ import warnings import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.tensor._numpy_helper import AxisError @@ -14,6 +13,9 @@ assert_raises_regex, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from .helper import ( diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py index 8ddba08dbb9..8095a0daa85 100644 --- a/dpnp/tests/test_manipulation.py +++ b/dpnp/tests/test_manipulation.py @@ -1,6 +1,5 @@ import itertools -import dpctl.tensor as dpt import numpy import pytest from dpctl.tensor._numpy_helper import AxisError @@ -10,6 +9,9 @@ assert_raises, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from .helper import ( diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index 760c1a0ceb2..841494bde1e 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -15,6 +15,9 @@ assert_raises_regex, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext import dpnp from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import map_dtype_to_device @@ -666,15 +669,15 @@ def test_to_begin_to_end(self, to_begin, to_end): "to_begin, to_end", [ (-20, 20), - (dpt.asarray([-20, -30]), dpt.asarray([20, 15])), - (dpt.asarray([[-20, -30]]), dpt.asarray([[20, 15]])), + (dpt_ext.asarray([-20, -30]), dpt_ext.asarray([20, 15])), + (dpt_ext.asarray([[-20, -30]]), dpt_ext.asarray([[20, 15]])), ([1, 2], [3, 4]), ((1, 2), (3, 4)), ], ) def test_usm_ndarray(self, to_begin, to_end): a = numpy.array([[1, 2, 0]]) - dpt_a = dpt.asarray(a) + dpt_a = dpt_ext.asarray(a) if isinstance(to_begin, dpt.usm_ndarray): np_to_begin = dpt.asnumpy(to_begin) @@ -1575,7 +1578,7 @@ def test_out(self): assert_allclose(result, expected) # output is usm_ndarray - dpt_out = dpt.empty(expected.shape, dtype=expected.dtype) + dpt_out = dpt_ext.empty(expected.shape, dtype=expected.dtype) result = dpnp.prod(ia, axis=0, out=dpt_out) assert dpt_out is result.get_array() assert_allclose(result, expected) @@ -2628,7 +2631,7 @@ def test_out_float16(self, func): def test_out_usm_ndarray(self, func, dt): a = generate_random_numpy_array(10, dt) out = numpy.empty(a.shape, dtype=dt) - ia, usm_out = dpnp.array(a), dpt.asarray(out) + ia, usm_out = dpnp.array(a), dpt_ext.asarray(out) expected = getattr(numpy, func)(a, out=out) result = getattr(dpnp, func)(ia, out=usm_out) diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py index 1bc0da8c153..94aeda33f50 100644 --- a/dpnp/tests/test_memory.py +++ b/dpnp/tests/test_memory.py @@ -2,6 +2,9 @@ import numpy import pytest +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext import dpnp import dpnp.memory as dpm @@ -21,7 +24,7 @@ def test_wrong_input_type(self, x): dpm.create_data(x) def test_wrong_usm_data(self): - a = dpt.ones(10) + a = dpt_ext.ones(10) d = IntUsmData(a.shape, buffer=a) with pytest.raises(TypeError): diff --git a/dpnp/tests/test_nanfunctions.py b/dpnp/tests/test_nanfunctions.py index d92cee045a7..2cb70df5954 100644 --- a/dpnp/tests/test_nanfunctions.py +++ b/dpnp/tests/test_nanfunctions.py @@ -1,5 +1,4 @@ import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.utils import ExecutionPlacementError @@ -12,6 +11,9 @@ assert_raises_regex, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from .helper import ( diff --git a/dpnp/tests/test_ndarray.py b/dpnp/tests/test_ndarray.py index 4e4e42bbc85..a27f0fe6aa1 100644 --- a/dpnp/tests/test_ndarray.py +++ b/dpnp/tests/test_ndarray.py @@ -9,6 +9,9 @@ assert_raises_regex, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext import dpnp from .helper import ( @@ -407,7 +410,7 @@ def test_error(self): class TestUsmNdarrayProtocol: def test_basic(self): a = dpnp.arange(256, dtype=dpnp.int64) - usm_a = dpt.asarray(a) + usm_a = dpt_ext.asarray(a) assert a.sycl_queue == usm_a.sycl_queue assert a.usm_type == usm_a.usm_type diff --git a/dpnp/tests/test_search.py b/dpnp/tests/test_search.py index 64c4eb75f90..05bc56b11d0 100644 --- a/dpnp/tests/test_search.py +++ b/dpnp/tests/test_search.py @@ -3,6 +3,9 @@ import pytest from numpy.testing import assert_array_equal, assert_equal, assert_raises +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt_ext import dpnp from .helper import ( @@ -36,7 +39,7 @@ def test_out(self, func): assert_array_equal(result, expected) # out is usm_ndarray - dpt_out = dpt.empty(expected.shape, dtype=expected.dtype) + dpt_out = dpt_ext.empty(expected.shape, dtype=expected.dtype) result = getattr(dpnp, func)(ia, axis=0, out=dpt_out) assert dpt_out is result.get_array() assert_array_equal(result, expected) diff --git a/dpnp/tests/test_statistics.py b/dpnp/tests/test_statistics.py index cf436087b60..fe8848b6c85 100644 --- a/dpnp/tests/test_statistics.py +++ b/dpnp/tests/test_statistics.py @@ -1,5 +1,4 @@ import dpctl -import dpctl.tensor as dpt import numpy import pytest from numpy.testing import ( @@ -9,6 +8,9 @@ assert_raises_regex, ) +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from .helper import ( diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index d1853579036..a9c076a7c47 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -2,12 +2,14 @@ import tempfile import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.utils import ExecutionPlacementError from numpy.testing import assert_array_equal, assert_raises +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp import dpnp.linalg from dpnp.dpnp_array import dpnp_array diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 4fc0f2b958f..8f8efd1cdd1 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -2,11 +2,13 @@ import tempfile from math import prod -import dpctl.tensor as dpt import dpctl.utils as du import numpy import pytest +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp from dpnp.dpnp_utils import get_usm_allocations diff --git a/dpnp/tests/test_utils.py b/dpnp/tests/test_utils.py index eef9132e5b5..ddbd267c210 100644 --- a/dpnp/tests/test_utils.py +++ b/dpnp/tests/test_utils.py @@ -1,7 +1,9 @@ -import dpctl.tensor as dpt import numpy import pytest +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpnp