From d5efcc7b3c3c63ea0d5177c0e44195ca6627a915 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 3 Jun 2025 12:03:04 +0200 Subject: [PATCH 1/9] Added Block::Clear() and some tests --- clickhouse/block.cpp | 12 ++++++++++-- clickhouse/block.h | 3 +++ ut/block_ut.cpp | 45 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/clickhouse/block.cpp b/clickhouse/block.cpp index 28f0ddc9..654f648c 100644 --- a/clickhouse/block.cpp +++ b/clickhouse/block.cpp @@ -81,8 +81,7 @@ size_t Block::GetRowCount() const { return rows_; } -size_t Block::RefreshRowCount() -{ +size_t Block::RefreshRowCount() { size_t rows = 0UL; for (size_t idx = 0UL; idx < columns_.size(); ++idx) @@ -100,6 +99,15 @@ size_t Block::RefreshRowCount() return rows_; } +void Block::Clear() { + for (auto & c : columns_) { + c.column->Clear(); + } + + RefreshRowCount(); +} + + ColumnRef Block::operator [] (size_t idx) const { if (idx < columns_.size()) { return columns_[idx].column; diff --git a/clickhouse/block.h b/clickhouse/block.h index 5b8f57da..fe4c453d 100644 --- a/clickhouse/block.h +++ b/clickhouse/block.h @@ -85,6 +85,9 @@ class Block { return columns_.at(idx).name; } + /// Convinience method to wipe out all rows from all columns + void Clear(); + /// Reference to column by index in the block. ColumnRef operator [] (size_t idx) const; diff --git a/ut/block_ut.cpp b/ut/block_ut.cpp index 3828cb65..2243de37 100644 --- a/ut/block_ut.cpp +++ b/ut/block_ut.cpp @@ -1,4 +1,9 @@ #include +#include +#include + +#include "clickhouse/columns/column.h" +#include "gtest/gtest-message.h" #include "readonly_client_test.h" #include "connection_failed_client_test.h" #include "utils.h" @@ -83,3 +88,43 @@ TEST(BlockTest, Iterators) { ASSERT_NE(block.cbegin(), block.cend()); } +TEST(BlockTest, Clear) { + // Test that Block::Clear removes all rows from all of the columns, + // without changing column instances, types, names, etc. + + auto block = MakeBlock({ + {"foo", std::make_shared(std::vector{1, 2, 3, 4, 5})}, + {"bar", std::make_shared(std::vector{"1", "2", "3", "4", "5"})}, + }); + + std::vector> expected_columns_description; + for (const auto & c : block) { + expected_columns_description.emplace_back(c.Name(), c.Column().get()); + } + + block.Clear(); + + // Block must report empty after being cleared + EXPECT_EQ(0u, block.GetRowCount()); + + size_t i = 0; + for (const auto & c : block) { + const auto & [expected_name, expected_column] = expected_columns_description[i]; + SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\""); + + // MUST be same column object + EXPECT_EQ(expected_column, c.Column().get()); + + // MUST have same column name + EXPECT_EQ(expected_name, c.Name()); + + // column MUST be empty + EXPECT_EQ(0u, c.Column()->Size()) + << c.ColumnIndex() << " : " << c.Name(); + + ++i; + EXPECT_FALSE(true); + } +} + + From 85a9241438452c7c38555fc7c86a8101b0dd183d Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 3 Jun 2025 12:13:03 +0200 Subject: [PATCH 2/9] Removed debug piece --- ut/block_ut.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ut/block_ut.cpp b/ut/block_ut.cpp index 2243de37..b577deff 100644 --- a/ut/block_ut.cpp +++ b/ut/block_ut.cpp @@ -123,7 +123,6 @@ TEST(BlockTest, Clear) { << c.ColumnIndex() << " : " << c.Name(); ++i; - EXPECT_FALSE(true); } } From 9c8d94cb33dfbe431b358cef921357c6e4378b7e Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 5 Jun 2025 02:15:48 +0200 Subject: [PATCH 3/9] Block::Reserve() and some fixes around ItemView{} --- clickhouse/block.cpp | 7 ++ clickhouse/block.h | 3 + clickhouse/columns/itemview.h | 2 +- clickhouse/columns/lowcardinality.cpp | 6 +- ut/block_ut.cpp | 60 ++++++++++- ut/utils.cpp | 144 +++++++++++++++++++++++++- ut/utils.h | 2 + ut/utils_comparison.h | 89 ++++++++++++++-- ut/utils_ut.cpp | 92 ++++++++++++++++ 9 files changed, 389 insertions(+), 16 deletions(-) diff --git a/clickhouse/block.cpp b/clickhouse/block.cpp index 654f648c..519cfa4e 100644 --- a/clickhouse/block.cpp +++ b/clickhouse/block.cpp @@ -107,6 +107,13 @@ void Block::Clear() { RefreshRowCount(); } +void Block::Reserve(size_t new_cap) { + for (auto & c : columns_) { + c.column->Reserve(new_cap); + } +} + + ColumnRef Block::operator [] (size_t idx) const { if (idx < columns_.size()) { diff --git a/clickhouse/block.h b/clickhouse/block.h index fe4c453d..8a42ae65 100644 --- a/clickhouse/block.h +++ b/clickhouse/block.h @@ -88,6 +88,9 @@ class Block { /// Convinience method to wipe out all rows from all columns void Clear(); + /// Convinience method to do Reserve() on all columns + void Reserve(size_t new_cap); + /// Reference to column by index in the block. ColumnRef operator [] (size_t idx) const; diff --git a/clickhouse/columns/itemview.h b/clickhouse/columns/itemview.h index 71f20355..199994b6 100644 --- a/clickhouse/columns/itemview.h +++ b/clickhouse/columns/itemview.h @@ -65,7 +65,7 @@ struct ItemView { using ValueType = std::remove_cv_t>; if constexpr (std::is_same_v || std::is_same_v) { return data; - } else if constexpr (std::is_fundamental_v || std::is_same_v) { + } else if constexpr (std::is_fundamental_v || std::is_same_v || std::is_same_v) { if (sizeof(ValueType) == data.size()) { return *reinterpret_cast(data.data()); } else { diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 19369d33..92f8a574 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -175,7 +176,10 @@ ColumnLowCardinality::~ColumnLowCardinality() {} void ColumnLowCardinality::Reserve(size_t new_cap) { - dictionary_column_->Reserve(new_cap); + // Assumption is that dictionary must be smaller than index. + // NOTE(vnemkov): Formula below (`ceil(sqrt(x))`) is a gut-feeling-good-enough estimation, + // feel free to replace/adjust if you have better one suported by actual data. + dictionary_column_->Reserve(static_cast(ceil(sqrt(new_cap)))); index_column_->Reserve(new_cap); } diff --git a/ut/block_ut.cpp b/ut/block_ut.cpp index b577deff..29c1b230 100644 --- a/ut/block_ut.cpp +++ b/ut/block_ut.cpp @@ -3,12 +3,14 @@ #include #include "clickhouse/columns/column.h" +#include "clickhouse/columns/lowcardinality.h" #include "gtest/gtest-message.h" -#include "readonly_client_test.h" -#include "connection_failed_client_test.h" + +#include "ut/utils_comparison.h" #include "utils.h" #include +#include namespace { using namespace clickhouse; @@ -16,11 +18,20 @@ using namespace clickhouse; Block MakeBlock(std::vector> columns) { Block result; + const size_t number_of_rows = columns.size() ? columns[0].second->Size() : 0; + size_t i = 0; for (const auto & name_and_col : columns) { + EXPECT_EQ(number_of_rows, name_and_col.second->Size()) + << "Column #" << i << " " << name_and_col.first << " has incorrect number of rows"; + result.AppendColumn(name_and_col.first, name_and_col.second); + + ++i; } result.RefreshRowCount(); + EXPECT_EQ(number_of_rows, result.GetRowCount()); + return result; } @@ -119,11 +130,52 @@ TEST(BlockTest, Clear) { EXPECT_EQ(expected_name, c.Name()); // column MUST be empty - EXPECT_EQ(0u, c.Column()->Size()) - << c.ColumnIndex() << " : " << c.Name(); + EXPECT_EQ(0u, c.Column()->Size()); ++i; } } +TEST(BlockTest, Reserve) { + // Test that Block::Reserve reserves space in all columns (uncheckable now), + // without changing column instances, names, and previously stored rows. + + auto block = MakeBlock({ + {"foo", std::make_shared(std::vector{1, 2, 3, 4, 5})}, + {"bar", std::make_shared(std::vector{"1", "2", "3", "4", "5"})}, + {"quix", std::make_shared>(std::vector{"1", "2", "3", "4", "5"})}, + }); + + const size_t initial_rows_count = block.GetRowCount(); + + std::vector> expected_columns_description; + for (const auto & c : block) { + expected_columns_description.emplace_back( + c.Name(), + c.Column().get(), // reference to the actual object + c.Column()->Slice(0, c.Column()->Size()) // reference to the values + ); + } + + block.Reserve(1000); // 1000 is arbitrary value + // Block must same number of rows as before Reserve + EXPECT_EQ(initial_rows_count, block.GetRowCount()); + + size_t i = 0; + for (const auto & c : block) { + const auto & [expected_name, expected_column, expected_values] = expected_columns_description[i]; + SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\""); + + // MUST have same column name + EXPECT_EQ(expected_name, c.Name()); + + // MUST be same column object + EXPECT_EQ(expected_column, c.Column().get()); + + // column MUST have the same values + EXPECT_TRUE(CompareRecursive(*expected_values, *c.Column())); + + ++i; + } +} diff --git a/ut/utils.cpp b/ut/utils.cpp index bfa4872e..ef7fada8 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -19,9 +19,15 @@ #include // for ipv4-ipv6 platform-specific stuff #include +#include +#include #include #include +#include #include +#include "clickhouse/types/types.h" +#include "absl/numeric/int128.h" + namespace { @@ -42,7 +48,7 @@ struct DateTimeValue { }; std::ostream& operator<<(std::ostream & ostr, const DateTimeValue & time) { - const auto t = std::localtime(&time.value); + const auto t = std::gmtime(&time.value); char buffer[] = "2015-05-18 07:40:12\0\0"; std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", t); @@ -173,6 +179,7 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) @@ -332,6 +339,141 @@ std::ostream & operator<<(std::ostream & ostr, const Progress & progress) { << " written_bytes : " << progress.written_bytes; } +std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { + ostr << "ItemView {" << clickhouse::Type::TypeName(item_view.type) << " : "; + + switch (item_view.type) { + case Type::Void: + ostr << "--void--"; + break; + case Type::Int8: + ostr << static_cast(item_view.get()); + break; + case Type::Int16: + ostr << static_cast(item_view.get()); + break; + case Type::Int32: + ostr << static_cast(item_view.get()); + break; + case Type::Int64: + ostr << item_view.get(); + break; + case Type::UInt8: + ostr << static_cast(item_view.get()); + break; + case Type::UInt16: + ostr << static_cast(item_view.get()); + break; + case Type::UInt32: + ostr << static_cast(item_view.get()); + break; + case Type::UInt64: + ostr << item_view.get(); + break; + case Type::Float32: + ostr << static_cast(item_view.get()); + break; + case Type::Float64: + ostr << static_cast(item_view.get()); + break; + case Type::String: + case Type::FixedString: + ostr << "\"" << item_view.data << "\" (" << item_view.data.size() << " bytes)"; + break; + case Type::Date: + ostr << DateTimeValue(item_view.get() * 86400); + break; + case Type::Date32: + ostr << DateTimeValue(item_view.get() * 86400); + break; + case Type::DateTime: + ostr << DateTimeValue(item_view.get()); + break; + case Type::DateTime64: { + if (item_view.data.size() == 4) { + ostr << DateTimeValue(item_view.get()); + } + else if (item_view.data.size() == 8) { + ostr << DateTimeValue(item_view.get()); + } + else if (item_view.data.size() == 16) { + ostr << DateTimeValue(item_view.get()); + } + else { + throw std::runtime_error("Invalid data size of ItemView of type DateTime64"); + } + break; + } + case Type::Enum8: + ostr << static_cast(item_view.get()); + break; + case Type::Enum16: + ostr << static_cast(item_view.get()); + break; + case Type::UUID: { + const auto & uuid_vals = reinterpret_cast(item_view.data.data()); + ostr << ToString(clickhouse::UUID{uuid_vals[0], uuid_vals[1]}); + break; + } + case Type::IPv4: { + in_addr addr; + addr.s_addr = ntohl(item_view.get()); + ostr << addr; + break; + } + case Type::IPv6: + ostr << *reinterpret_cast(item_view.AsBinaryData().data()); + break; + case Type::Int128: + ostr << item_view.get(); + break; + case Type::UInt128: + ostr << item_view.get(); + break; + case Type::Decimal: { + if (item_view.data.size() == 4) { + ostr << item_view.get(); + } + else if (item_view.data.size() == 8) { + ostr << item_view.get(); + } + else if (item_view.data.size() == 16) { + ostr << item_view.get(); + } + else { + throw std::runtime_error("Invalid data size of ItemView of type DateTime64"); + } + } + break; + case Type::Decimal32: + ostr << DateTimeValue(item_view.get()); + break; + case Type::Decimal64: + ostr << DateTimeValue(item_view.get()); + break; + case Type::Decimal128: + ostr << DateTimeValue(item_view.get()); + break; + // Unsupported types. i.e. there shouldn't be `ItemView`s of those types in practice. + // either because GetItem() is not implemented for corresponding column type + // OR this type code is never used, for `ItemView`s (but type code of wrapped column is). + case Type::LowCardinality: + case Type::Array: + case Type::Nullable: + case Type::Tuple: + case Type::Map: + case Type::Point: + case Type::Ring: + case Type::Polygon: + case Type::MultiPolygon: { + throw std::runtime_error("Invalid data size of ItemView of type " + std::string(Type::TypeName(item_view.type))); + } + }; + + return ostr << "}"; +} + + } uint64_t versionNumber(const ServerInfo & server_info) { diff --git a/ut/utils.h b/ut/utils.h index 99216743..d9eb5614 100644 --- a/ut/utils.h +++ b/ut/utils.h @@ -3,6 +3,7 @@ #include #include +#include "clickhouse/columns/itemview.h" #include "clickhouse/query.h" #include "utils_meta.h" #include "utils_comparison.h" @@ -144,6 +145,7 @@ std::ostream& operator<<(std::ostream & ostr, const Type & type); std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info); std::ostream & operator<<(std::ostream & ostr, const Profile & profile); std::ostream & operator<<(std::ostream & ostr, const Progress & progress); +std::ostream & operator<<(std::ostream& ostr, const ItemView& item_view); } std::ostream& operator<<(std::ostream & ostr, const PrettyPrintBlock & block); diff --git a/ut/utils_comparison.h b/ut/utils_comparison.h index e500e4f3..be6f179d 100644 --- a/ut/utils_comparison.h +++ b/ut/utils_comparison.h @@ -14,6 +14,8 @@ namespace clickhouse { class Block; class Column; + + std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view); } inline bool operator==(const in6_addr& left, const in6_addr& right) { @@ -104,6 +106,72 @@ struct ColumnAsContainerWrapper { return Iterator{nested_col, nested_col.Size()}; } }; + +// Helper to allow comparing values of two instances of clickhouse::Column, when concrete type is unknown. +// Comparison is done by comparing result of Column::GetItem(). +template <> +struct ColumnAsContainerWrapper { + const clickhouse::Column& nested_col; + + struct Iterator { + const clickhouse::Column& nested_col; + size_t i = 0; + + auto& operator++() { + ++i; + return *this; + } + + struct ItemWrapper { + const clickhouse::ItemView item_view; + + bool operator==(const ItemWrapper & other) const { + // type-erased comparison, byte-by-byte + return item_view.type == other.item_view.type + && item_view.data == other.item_view.data; + } + + template + bool operator==(const U & other) const { + return item_view.get() == other; + } + + template + bool operator!=(const U & other) const { + return !(*this == other); + } + + friend std::ostream& operator<<(std::ostream& ostr, const ItemWrapper& val) { + return ostr << val.item_view; + } + }; + + auto operator*() const { + return ItemWrapper{nested_col.GetItem(i)}; + } + + bool operator==(const Iterator & other) const { + return &other.nested_col == &this->nested_col && other.i == this->i; + } + + bool operator!=(const Iterator & other) const { + return !(other == *this); + } + }; + + size_t size() const { + return nested_col.Size(); + } + + auto begin() const { + return Iterator{nested_col, 0}; + } + + auto end() const { + return Iterator{nested_col, nested_col.Size()}; + } +}; + } template @@ -146,15 +214,18 @@ ::testing::AssertionResult CompareRecursive(const Left & left, const Right & rig if constexpr (!is_string_v && !is_string_v && (is_container_v || std::is_base_of_v>) && (is_container_v || std::is_base_of_v>) ) { - - const auto & l = maybeWrapColumnAsContainer(left); - const auto & r = maybeWrapColumnAsContainer(right); - - if (auto result = CompareCotainersRecursive(l, r)) - return result; - else - return result << "\nExpected container: " << PrintContainer{l} - << "\nActual container : " << PrintContainer{r}; + // if constexpr (std::is_same_v && std::is_same_v) { + + // } else { + const auto & l = maybeWrapColumnAsContainer(left); + const auto & r = maybeWrapColumnAsContainer(right); + + if (auto result = CompareCotainersRecursive(l, r)) + return result; + else + return result << "\nExpected container: " << PrintContainer{l} + << "\nActual container : " << PrintContainer{r}; + // } } else { if (left != right) { diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index bd015751..55333043 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -1,9 +1,19 @@ #include +#include "clickhouse/base/uuid.h" +#include "clickhouse/columns/date.h" +#include "clickhouse/columns/decimal.h" +#include "clickhouse/columns/enum.h" +#include "clickhouse/columns/ip4.h" +#include "clickhouse/columns/numeric.h" +#include "clickhouse/columns/string.h" +#include "clickhouse/columns/uuid.h" #include "ut/value_generators.h" #include "utils.h" +#include "absl/numeric/int128.h" #include #include +#include #include TEST(CompareRecursive, CompareValues) { @@ -120,3 +130,85 @@ TEST(Generators, MakeArrays) { auto arrays = MakeArrays(); ASSERT_LT(0u, arrays.size()); } + +// I.e. object ItemView can be serialized to string +std::string toString(const clickhouse::ItemView & iv) { + std::stringstream sstr; + sstr << iv; + + return sstr.str(); +} + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +#define EXPECTED_SERIALIZATION(expected, column_expression, value) \ +do {\ + auto column = column_expression; \ + column.Append((value)); \ + EXPECT_EQ("ItemView {" expected "}", toString(column.GetItem(0))) \ + << "Created from " << STRINGIFY(value); \ +}\ +while (false) + +TEST(ItemView, OutputToOstream_positive) { + // testing `std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view)` + using namespace clickhouse; + + // Positive cases: output should be generated + EXPECTED_SERIALIZATION("String : \"string\" (6 bytes)", ColumnString(), "string"); + EXPECTED_SERIALIZATION("FixedString : \"string\" (6 bytes)", ColumnFixedString(6), "string"); + + EXPECTED_SERIALIZATION("Int8 : -123", ColumnInt8(), -123); + EXPECTED_SERIALIZATION("Int16 : -1234", ColumnInt16(), -1234); + EXPECTED_SERIALIZATION("Int32 : -12345", ColumnInt32(), -12345); + EXPECTED_SERIALIZATION("Int64 : -123456", ColumnInt64(), -123456); + EXPECTED_SERIALIZATION("Int128 : -123456789", ColumnInt128(), absl::MakeInt128(-1, -123456789ll)); + + EXPECTED_SERIALIZATION("UInt8 : 123", ColumnUInt8(), 123); + EXPECTED_SERIALIZATION("UInt16 : 1234", ColumnUInt16(), 1234); + EXPECTED_SERIALIZATION("UInt32 : 12345", ColumnUInt32(), 12345); + EXPECTED_SERIALIZATION("UInt64 : 1234567", ColumnUInt64(), 1234567); + EXPECTED_SERIALIZATION("UInt128 : 123456789", ColumnUInt128(), absl::MakeUint128(0, 123456789ll)); + + EXPECTED_SERIALIZATION("Float32 : 1", ColumnFloat32(), 1); + EXPECTED_SERIALIZATION("Float64 : 4", ColumnFloat64(), 4); + + using EnumItem = Type::EnumItem; + + EXPECTED_SERIALIZATION("Enum8 : 123", ColumnEnum8(Type::CreateEnum8({EnumItem{"one", 123}})), 123); + EXPECTED_SERIALIZATION("Enum16 : 12345", ColumnEnum16(Type::CreateEnum16({EnumItem{"one", 12345}})), 12345); + + EXPECTED_SERIALIZATION("IPv4 : 127.0.0.1", ColumnIPv4(), "127.0.0.1"); + EXPECTED_SERIALIZATION("IPv6 : ::ffff:204.152.189.116", ColumnIPv6(), "::ffff:204.152.189.116"); + + EXPECTED_SERIALIZATION("UUID : bb6a8c69-9ab2-414c-8669-7b7fd27f0825", ColumnUUID(), UUID(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu)); + + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(6, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(6, 3), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(12, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(12, 6), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(18, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(18, 9), 1234567); + + EXPECTED_SERIALIZATION("Date : 1970-05-04 00:00:00", ColumnDate(), time_t(123) * 86400); + EXPECTED_SERIALIZATION("Date32 : 1969-08-31 00:00:00", ColumnDate32(), time_t(-123) * 86400); + EXPECTED_SERIALIZATION("DateTime : 1970-01-15 06:56:07", ColumnDateTime(), 1234567); + // this is completely bogus, since precision is not taken into account + EXPECTED_SERIALIZATION("DateTime64 : 1970-01-15 06:56:07", ColumnDateTime64(3), 1234567); +} + +TEST(ItemView, OutputToOstream_negative) { + using namespace clickhouse; + + EXPECT_ANY_THROW(toString(ItemView{Type::LowCardinality, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Array, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Nullable, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Tuple, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Map, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Point, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Ring, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::Polygon, {}})); + EXPECT_ANY_THROW(toString(ItemView{Type::MultiPolygon, {}})); + +} From 9e225ccaf763d31053926e897735a2d5e7ba997f Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 5 Jun 2025 19:59:10 +0200 Subject: [PATCH 4/9] Fixed error message Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ut/utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ut/utils.cpp b/ut/utils.cpp index ef7fada8..ca1b02ef 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -441,7 +441,7 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { ostr << item_view.get(); } else { - throw std::runtime_error("Invalid data size of ItemView of type DateTime64"); + throw std::runtime_error("Invalid data size of ItemView of type Decimal"); } } break; From e57e4637b2806ec67ed264af70d5827e22d11e5d Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 5 Jun 2025 20:00:53 +0200 Subject: [PATCH 5/9] Remove commented out code --- ut/utils_comparison.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ut/utils_comparison.h b/ut/utils_comparison.h index be6f179d..5c9d0b18 100644 --- a/ut/utils_comparison.h +++ b/ut/utils_comparison.h @@ -214,18 +214,15 @@ ::testing::AssertionResult CompareRecursive(const Left & left, const Right & rig if constexpr (!is_string_v && !is_string_v && (is_container_v || std::is_base_of_v>) && (is_container_v || std::is_base_of_v>) ) { - // if constexpr (std::is_same_v && std::is_same_v) { - - // } else { - const auto & l = maybeWrapColumnAsContainer(left); - const auto & r = maybeWrapColumnAsContainer(right); - - if (auto result = CompareCotainersRecursive(l, r)) - return result; - else - return result << "\nExpected container: " << PrintContainer{l} - << "\nActual container : " << PrintContainer{r}; - // } + + const auto & l = maybeWrapColumnAsContainer(left); + const auto & r = maybeWrapColumnAsContainer(right); + + if (auto result = CompareCotainersRecursive(l, r)) + return result; + else + return result << "\nExpected container: " << PrintContainer{l} + << "\nActual container : " << PrintContainer{r}; } else { if (left != right) { From b7f654cbf6f3eb00e458cce17a0ac30a8532c864 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 6 Jun 2025 18:14:18 +0200 Subject: [PATCH 6/9] Better Reserve() on index column of LC --- clickhouse/columns/lowcardinality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 92f8a574..e14aec08 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -180,7 +180,7 @@ void ColumnLowCardinality::Reserve(size_t new_cap) { // NOTE(vnemkov): Formula below (`ceil(sqrt(x))`) is a gut-feeling-good-enough estimation, // feel free to replace/adjust if you have better one suported by actual data. dictionary_column_->Reserve(static_cast(ceil(sqrt(new_cap)))); - index_column_->Reserve(new_cap); + index_column_->Reserve(new_cap + 2); // + 1 for null item (at pos 0), + 1 for default item (at pos 1) } void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { From 8b9ecf82695a00a22acaf23f79c82643d9d411b6 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 9 Jun 2025 17:10:20 +0200 Subject: [PATCH 7/9] Update lowcardinality.cpp --- clickhouse/columns/lowcardinality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index e14aec08..460e0e2e 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -179,7 +179,7 @@ void ColumnLowCardinality::Reserve(size_t new_cap) { // Assumption is that dictionary must be smaller than index. // NOTE(vnemkov): Formula below (`ceil(sqrt(x))`) is a gut-feeling-good-enough estimation, // feel free to replace/adjust if you have better one suported by actual data. - dictionary_column_->Reserve(static_cast(ceil(sqrt(new_cap)))); + dictionary_column_->Reserve(static_cast(ceil(sqrt(static_cast(new_cap))))); index_column_->Reserve(new_cap + 2); // + 1 for null item (at pos 0), + 1 for default item (at pos 1) } From 5b7a590892fed295361af9ecccdc7685fdef5292 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 9 Jun 2025 19:08:14 +0200 Subject: [PATCH 8/9] Fixed build on windows --- ut/utils.cpp | 2 +- ut/utils_ut.cpp | 48 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/ut/utils.cpp b/ut/utils.cpp index ca1b02ef..4bfd75bf 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -384,7 +384,7 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { ostr << DateTimeValue(item_view.get() * 86400); break; case Type::Date32: - ostr << DateTimeValue(item_view.get() * 86400); + ostr << DateTimeValue(item_view.get()); break; case Type::DateTime: ostr << DateTimeValue(item_view.get()); diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index 55333043..cb6cb55a 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -147,12 +147,14 @@ do {\ auto column = column_expression; \ column.Append((value)); \ EXPECT_EQ("ItemView {" expected "}", toString(column.GetItem(0))) \ - << "Created from " << STRINGIFY(value); \ + << "Created from " << STRINGIFY((value)); \ }\ while (false) -TEST(ItemView, OutputToOstream_positive) { - // testing `std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view)` +TEST(ItemView, OutputToOstream_VALID) { + // Testing output of `std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view)` + // result must match predefined value. + using namespace clickhouse; // Positive cases: output should be generated @@ -192,23 +194,43 @@ TEST(ItemView, OutputToOstream_positive) { EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(18, 9), 1234567); EXPECTED_SERIALIZATION("Date : 1970-05-04 00:00:00", ColumnDate(), time_t(123) * 86400); - EXPECTED_SERIALIZATION("Date32 : 1969-08-31 00:00:00", ColumnDate32(), time_t(-123) * 86400); EXPECTED_SERIALIZATION("DateTime : 1970-01-15 06:56:07", ColumnDateTime(), 1234567); // this is completely bogus, since precision is not taken into account EXPECTED_SERIALIZATION("DateTime64 : 1970-01-15 06:56:07", ColumnDateTime64(3), 1234567); +#if defined(_unix_) + // These tests do not work on Windows, and since we test here auxiliary functionality + // (not used by clients, but only in tests), I assume it is safe to just ignore the failure. + EXPECTED_SERIALIZATION("DateTime64 : 1969-12-17 17:03:53", ColumnDateTime64(3), -1234567); + + { + auto column = ColumnDate32(); + column.AppendRaw(-123); + EXPECT_EQ("ItemView {Date32 : 1969-12-31 23:57:57}", toString(column.GetItem(0))); + } + // EXPECTED_SERIALIZATION("Date32 : 1969-08-31 00:00:00", ColumnDate32(), time_t(-123) * 86400); +#endif +} + +namespace { + +clickhouse::ItemView MakeEmptyItemView(clickhouse::Type::Code type_code) { + return clickhouse::ItemView(type_code, std::string_view()); +} + } TEST(ItemView, OutputToOstream_negative) { using namespace clickhouse; - EXPECT_ANY_THROW(toString(ItemView{Type::LowCardinality, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Array, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Nullable, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Tuple, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Map, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Point, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Ring, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::Polygon, {}})); - EXPECT_ANY_THROW(toString(ItemView{Type::MultiPolygon, {}})); + // Doesn't matter what content we point ItemView into, those types are not supported. + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::LowCardinality))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Array))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Nullable))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Tuple))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Map))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Point))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Ring))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Polygon))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::MultiPolygon))); } From d0c6abcb6b832349aeb6eaabd2a13f997d2fb06a Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 20 Jun 2025 00:55:34 +0200 Subject: [PATCH 9/9] Some fixes to ItemView pretty-printing --- ut/utils.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ut/utils.cpp b/ut/utils.cpp index 4bfd75bf..e2219dcd 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -390,13 +390,13 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { ostr << DateTimeValue(item_view.get()); break; case Type::DateTime64: { - if (item_view.data.size() == 4) { + if (item_view.data.size() == sizeof(int32_t)) { ostr << DateTimeValue(item_view.get()); } - else if (item_view.data.size() == 8) { + else if (item_view.data.size() == sizeof(int64_t)) { ostr << DateTimeValue(item_view.get()); } - else if (item_view.data.size() == 16) { + else if (item_view.data.size() == sizeof(Int128)) { ostr << DateTimeValue(item_view.get()); } else { @@ -405,10 +405,10 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { break; } case Type::Enum8: - ostr << static_cast(item_view.get()); + ostr << static_cast(item_view.get()); break; case Type::Enum16: - ostr << static_cast(item_view.get()); + ostr << static_cast(item_view.get()); break; case Type::UUID: { const auto & uuid_vals = reinterpret_cast(item_view.data.data()); @@ -431,13 +431,13 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { ostr << item_view.get(); break; case Type::Decimal: { - if (item_view.data.size() == 4) { + if (item_view.data.size() == sizeof(int32_t)) { ostr << item_view.get(); } - else if (item_view.data.size() == 8) { + else if (item_view.data.size() == sizeof(int64_t)) { ostr << item_view.get(); } - else if (item_view.data.size() == 16) { + else if (item_view.data.size() == sizeof(Int128)) { ostr << item_view.get(); } else {