From c52fb1c5ceb658c434938ddb602f62e3b920bf6a Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Thu, 15 Jan 2026 15:25:58 +0200 Subject: [PATCH 1/6] GH-48866: [C++][Gandiva] Truncate subseconds beyond milliseconds in `castTIMESTAMP_utf8` and `castTIME_utf8` --- cpp/src/gandiva/precompiled/time.cc | 19 +++++---- cpp/src/gandiva/precompiled/time_test.cc | 54 +++++++++++++++++------- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 8bbd0930991..388c0488ccd 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -747,11 +747,12 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l // adjust the milliseconds if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for timestamp value "; - set_error_for_date(length, input, msg, context); - return 0; + // Truncate to 3 digits (milliseconds precision) if more digits are provided + while (sub_seconds_len > 3) { + ts_fields[TimeFields::kSubSeconds] /= 10; + sub_seconds_len--; } + // Pad with zeros if less than 3 digits while (sub_seconds_len < 3) { ts_fields[TimeFields::kSubSeconds] *= 10; sub_seconds_len++; @@ -867,12 +868,12 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { // adjust the milliseconds if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for time value "; - set_error_for_date(length, input, msg, context); - return 0; + // Truncate to 3 digits (milliseconds precision) if more digits are provided + while (sub_seconds_len > 3) { + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] /= 10; + sub_seconds_len--; } - + // Pad with zeros if less than 3 digits while (sub_seconds_len < 3) { time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10; sub_seconds_len++; diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 0d3b348754a..82b38d1b577 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -122,15 +122,26 @@ TEST(TestTime, TestCastTimestamp) { "Not a valid time for timestamp value 2000-01-01 00:00:100"); context.Reset(); - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.0001"); - context.Reset(); - - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.1000"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "2000-01-01 00:00:00.0001" should truncate to "2000-01-01 00:00:00.000" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.000", 23)); + + // "2000-01-01 00:00:00.1000" should truncate to "2000-01-01 00:00:00.100" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.100", 23)); + + // "2000-01-01 00:00:00.123456789" should truncate to "2000-01-01 00:00:00.123" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123456789", 29), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123", 23)); + + // "2000-01-01 00:00:00.1999" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1999", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); + + // "2000-01-01 00:00:00.1994" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1994", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); } TEST(TestTime, TestCastTimeUtf8) { @@ -166,13 +177,26 @@ TEST(TestTime, TestCastTimeUtf8) { EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100"); context.Reset(); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.0001"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "00:00:00.0001" should truncate to "00:00:00.000" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), + castTIME_utf8(context_ptr, "00:00:00.000", 12)); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.1000"); - context.Reset(); + // "00:00:00.1000" should truncate to "00:00:00.100" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), + castTIME_utf8(context_ptr, "00:00:00.100", 12)); + + // "9:45:30.123456789" should truncate to "9:45:30.123" + EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.123456789", 17), + castTIME_utf8(context_ptr, "9:45:30.123", 11)); + + // "00:00:00.1999" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1999", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); + + // "00:00:00.1994" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1994", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); } #ifndef _WIN32 From 601f7dd5990e095e1db590d8f8427493b1ec5d20 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Thu, 22 Jan 2026 19:02:48 +0200 Subject: [PATCH 2/6] GH-48866: Address comment to factor out common code --- cpp/src/gandiva/precompiled/time.cc | 49 +++++++++++++++-------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 388c0488ccd..1a0ac0aae6e 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -566,6 +566,27 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { seconds < 60; } +// Normalize sub-seconds value to milliseconds precision (3 digits). +// Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits +FORCE_INLINE +int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { + if (num_digits <= 0 || num_digits == 3) { + // No need to adjust + return subseconds; + } + // Calculate the power of 10 adjustment needed + int32_t digit_diff = num_digits - 3; + while (digit_diff > 0) { + subseconds /= 10; + digit_diff--; + } + while (digit_diff < 0) { + subseconds *= 10; + digit_diff++; + } + return subseconds; +} + // MONTHS_BETWEEN returns number of months between dates date1 and date2. // If date1 is later than date2, then the result is positive. // If date1 is earlier than date2, then the result is negative. @@ -746,18 +767,8 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l } // adjust the milliseconds - if (sub_seconds_len > 0) { - // Truncate to 3 digits (milliseconds precision) if more digits are provided - while (sub_seconds_len > 3) { - ts_fields[TimeFields::kSubSeconds] /= 10; - sub_seconds_len--; - } - // Pad with zeros if less than 3 digits - while (sub_seconds_len < 3) { - ts_fields[TimeFields::kSubSeconds] *= 10; - sub_seconds_len++; - } - } + ts_fields[TimeFields::kSubSeconds] = + normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); // handle timezone if (encountered_zone) { int err = 0; @@ -867,18 +878,8 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { } // adjust the milliseconds - if (sub_seconds_len > 0) { - // Truncate to 3 digits (milliseconds precision) if more digits are provided - while (sub_seconds_len > 3) { - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] /= 10; - sub_seconds_len--; - } - // Pad with zeros if less than 3 digits - while (sub_seconds_len < 3) { - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10; - sub_seconds_len++; - } - } + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = normalize_subseconds_to_millis( + time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours]; int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours]; From 7029372f4f8ad659d1d78279c17dbf44c995baaf Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Fri, 23 Jan 2026 18:07:41 +0200 Subject: [PATCH 3/6] GH-48866: Fix code formatting --- cpp/src/gandiva/precompiled/time.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 1a0ac0aae6e..1fe951a69a1 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -768,7 +768,7 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l // adjust the milliseconds ts_fields[TimeFields::kSubSeconds] = - normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); + normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); // handle timezone if (encountered_zone) { int err = 0; @@ -878,8 +878,9 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { } // adjust the milliseconds - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = normalize_subseconds_to_millis( - time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = + normalize_subseconds_to_millis( + time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours]; int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours]; From e57a00d714f09d55724854dcd2e0011e51055ded Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Mon, 26 Jan 2026 12:18:19 +0200 Subject: [PATCH 4/6] GH-48866: Use `ARROW_FORCE_INLINE` and hide `normalize_subseconds_to_millis` by static keywoard --- cpp/src/gandiva/precompiled/time.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 1fe951a69a1..d60c1c8054a 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -28,6 +28,7 @@ extern "C" { #include "./time_constants.h" #include "./time_fields.h" #include "./types.h" +#include "arrow/util/macros.h" #define MINS_IN_HOUR 60 #define SECONDS_IN_MINUTE 60 @@ -568,8 +569,8 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { // Normalize sub-seconds value to milliseconds precision (3 digits). // Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits -FORCE_INLINE -int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { +ARROW_FORCE_INLINE +static int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { if (num_digits <= 0 || num_digits == 3) { // No need to adjust return subseconds; From 05775b514f82853c08051cd9df88f45e16da2351 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Mon, 26 Jan 2026 16:46:18 +0200 Subject: [PATCH 5/6] GH-48866: Add `inline` keywoard to fix the warning --- cpp/src/gandiva/precompiled/time.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index d60c1c8054a..5af4c43ee3d 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -570,7 +570,8 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { // Normalize sub-seconds value to milliseconds precision (3 digits). // Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits ARROW_FORCE_INLINE -static int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { +static inline int32_t normalize_subseconds_to_millis(int32_t subseconds, + int32_t num_digits) { if (num_digits <= 0 || num_digits == 3) { // No need to adjust return subseconds; From 58c988d0dea1d26d9c10af11a783ec265e5b52b7 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Tue, 27 Jan 2026 19:04:32 +0200 Subject: [PATCH 6/6] GH-48866: Remove `ARROW_FORCE_INLINE` since force inline is not needed there --- cpp/src/gandiva/precompiled/time.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 5af4c43ee3d..e1e9ac44567 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -28,7 +28,6 @@ extern "C" { #include "./time_constants.h" #include "./time_fields.h" #include "./types.h" -#include "arrow/util/macros.h" #define MINS_IN_HOUR 60 #define SECONDS_IN_MINUTE 60 @@ -569,7 +568,6 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { // Normalize sub-seconds value to milliseconds precision (3 digits). // Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits -ARROW_FORCE_INLINE static inline int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { if (num_digits <= 0 || num_digits == 3) {