From d528db99edb27dfa2d66b0ed5aba47967564cb26 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Sun, 18 Jan 2026 12:37:39 +0530 Subject: [PATCH 1/4] perf: Optimize signum scalar performance with fast path --- datafusion/functions/benches/signum.rs | 46 +++++++++++++++++++++++++ datafusion/functions/src/math/signum.rs | 30 +++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 08a197a60eb75..b34e52d7f2e19 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::signum; @@ -88,6 +89,51 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + + // Scalar benchmarks (the optimization we added) + let scalar_f32_args = + vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(-42.5)))]; + let scalar_f32_arg_fields = + vec![Field::new("a", DataType::Float32, false).into()]; + let return_field_f32 = Field::new("f", DataType::Float32, false).into(); + + c.bench_function(&format!("signum f32 scalar: {size}"), |b| { + b.iter(|| { + black_box( + signum + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f32_args.clone(), + arg_fields: scalar_f32_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_f32), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); + + let scalar_f64_args = + vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(-42.5)))]; + let scalar_f64_arg_fields = + vec![Field::new("a", DataType::Float64, false).into()]; + let return_field_f64 = Field::new("f", DataType::Float64, false).into(); + + c.bench_function(&format!("signum f64 scalar: {size}"), |b| { + b.iter(|| { + black_box( + signum + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f64_args.clone(), + arg_fields: scalar_f64_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_f64), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); } } diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index e217088c64c2e..cc29e88a6dd89 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, AsArray}; use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; -use datafusion_common::{Result, exec_err}; +use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -98,6 +98,34 @@ impl ScalarUDFImpl for SignumFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let arg = &args.args[0]; + + // Scalar fast path for float types - avoid array conversion overhead + if let ColumnarValue::Scalar(scalar) = arg { + if scalar.is_null() { + return ColumnarValue::Scalar(ScalarValue::Null) + .cast_to(args.return_type(), None); + } + + match scalar { + ScalarValue::Float64(Some(v)) => { + let result = if *v == 0.0 { 0.0 } else { v.signum() }; + return Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(result)))); + } + ScalarValue::Float32(Some(v)) => { + let result = if *v == 0.0 { 0.0 } else { v.signum() }; + return Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(result)))); + } + _ => { + return internal_err!( + "Unexpected scalar type for signum: {:?}", + scalar.data_type() + ); + } + } + } + + // Array path make_scalar_function(signum, vec![])(&args.args) } From 85bef0fc01190976d09a690c60a6bc360d36a98d Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Sun, 18 Jan 2026 22:04:40 +0530 Subject: [PATCH 2/4] suggestion from jeffrey Co-authored-by: Jeffrey Vo --- datafusion/functions/src/math/signum.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index cc29e88a6dd89..093e9e85be2cb 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -98,7 +98,7 @@ impl ScalarUDFImpl for SignumFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - let arg = &args.args[0]; + let arg = take_function_args(self.name(), args.args)?; // Scalar fast path for float types - avoid array conversion overhead if let ColumnarValue::Scalar(scalar) = arg { From 67eb64166b3365b1db43eca6c597c95736f9cdcf Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Mon, 19 Jan 2026 09:52:30 +0530 Subject: [PATCH 3/4] remove comments and inline signum function --- datafusion/functions/src/math/signum.rs | 96 +++++++++++-------------- 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index 093e9e85be2cb..f4e6ac7237704 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -18,10 +18,11 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, AsArray}; +use arrow::array::AsArray; use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; +use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ @@ -30,8 +31,6 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use crate::utils::make_scalar_function; - #[user_doc( doc_section(label = "Math Functions"), description = r#"Returns the sign of a number. @@ -98,35 +97,51 @@ impl ScalarUDFImpl for SignumFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - let arg = take_function_args(self.name(), args.args)?; - - // Scalar fast path for float types - avoid array conversion overhead - if let ColumnarValue::Scalar(scalar) = arg { - if scalar.is_null() { - return ColumnarValue::Scalar(ScalarValue::Null) - .cast_to(args.return_type(), None); - } + let return_type = args.return_type().clone(); + let [arg] = take_function_args(self.name(), args.args)?; - match scalar { - ScalarValue::Float64(Some(v)) => { - let result = if *v == 0.0 { 0.0 } else { v.signum() }; - return Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(result)))); + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return ColumnarValue::Scalar(ScalarValue::Null) + .cast_to(&return_type, None); } - ScalarValue::Float32(Some(v)) => { - let result = if *v == 0.0 { 0.0 } else { v.signum() }; - return Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(result)))); - } - _ => { - return internal_err!( - "Unexpected scalar type for signum: {:?}", - scalar.data_type() - ); + + match scalar { + ScalarValue::Float64(Some(v)) => { + let result = if v == 0.0 { 0.0 } else { v.signum() }; + Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(result)))) + } + ScalarValue::Float32(Some(v)) => { + let result = if v == 0.0 { 0.0 } else { v.signum() }; + Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(result)))) + } + _ => { + internal_err!( + "Unexpected scalar type for signum: {:?}", + scalar.data_type() + ) + } } } + ColumnarValue::Array(array) => match array.data_type() { + Float64 => Ok(ColumnarValue::Array(Arc::new( + array.as_primitive::().unary::<_, Float64Type>( + |x: f64| { + if x == 0.0 { 0.0 } else { x.signum() } + }, + ), + ))), + Float32 => Ok(ColumnarValue::Array(Arc::new( + array.as_primitive::().unary::<_, Float32Type>( + |x: f32| { + if x == 0.0 { 0.0 } else { x.signum() } + }, + ), + ))), + other => exec_err!("Unsupported data type {other:?} for function signum"), + }, } - - // Array path - make_scalar_function(signum, vec![])(&args.args) } fn documentation(&self) -> Option<&Documentation> { @@ -134,33 +149,6 @@ impl ScalarUDFImpl for SignumFunc { } } -/// signum SQL function -fn signum(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - Float64 => Ok(Arc::new( - args[0] - .as_primitive::() - .unary::<_, Float64Type>( - |x: f64| { - if x == 0_f64 { 0_f64 } else { x.signum() } - }, - ), - ) as ArrayRef), - - Float32 => Ok(Arc::new( - args[0] - .as_primitive::() - .unary::<_, Float32Type>( - |x: f32| { - if x == 0_f32 { 0_f32 } else { x.signum() } - }, - ), - ) as ArrayRef), - - other => exec_err!("Unsupported data type {other:?} for function signum"), - } -} - #[cfg(test)] mod test { use std::sync::Arc; From 3975661897355104eb4c52ec4bea914e9ed8b93a Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Mon, 19 Jan 2026 11:55:30 +0530 Subject: [PATCH 4/4] use internal error --- datafusion/functions/src/math/signum.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index f4e6ac7237704..8a3769a12f294 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::utils::take_function_args; -use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -139,7 +139,9 @@ impl ScalarUDFImpl for SignumFunc { }, ), ))), - other => exec_err!("Unsupported data type {other:?} for function signum"), + other => { + internal_err!("Unsupported data type {other:?} for function signum") + } }, } }