diff --git a/clickhouse/block.cpp b/clickhouse/block.cpp index 28f0ddc9..519cfa4e 100644 --- a/clickhouse/block.cpp +++ b/clickhouse/block.cpp @@ -81,8 +81,7 @@ size_t Block::GetRowCount() const { return rows_; } -size_t Block::RefreshRowCount() -{ +size_t Block::RefreshRowCount() { size_t rows = 0UL; for (size_t idx = 0UL; idx < columns_.size(); ++idx) @@ -100,6 +99,22 @@ size_t Block::RefreshRowCount() return rows_; } +void Block::Clear() { + for (auto & c : columns_) { + c.column->Clear(); + } + + RefreshRowCount(); +} + +void Block::Reserve(size_t new_cap) { + for (auto & c : columns_) { + c.column->Reserve(new_cap); + } +} + + + ColumnRef Block::operator [] (size_t idx) const { if (idx < columns_.size()) { return columns_[idx].column; diff --git a/clickhouse/block.h b/clickhouse/block.h index 5b8f57da..8a42ae65 100644 --- a/clickhouse/block.h +++ b/clickhouse/block.h @@ -85,6 +85,12 @@ class Block { return columns_.at(idx).name; } + /// Convinience method to wipe out all rows from all columns + void Clear(); + + /// Convinience method to do Reserve() on all columns + void Reserve(size_t new_cap); + /// Reference to column by index in the block. ColumnRef operator [] (size_t idx) const; diff --git a/clickhouse/columns/itemview.h b/clickhouse/columns/itemview.h index 71f20355..199994b6 100644 --- a/clickhouse/columns/itemview.h +++ b/clickhouse/columns/itemview.h @@ -65,7 +65,7 @@ struct ItemView { using ValueType = std::remove_cv_t>; if constexpr (std::is_same_v || std::is_same_v) { return data; - } else if constexpr (std::is_fundamental_v || std::is_same_v) { + } else if constexpr (std::is_fundamental_v || std::is_same_v || std::is_same_v) { if (sizeof(ValueType) == data.size()) { return *reinterpret_cast(data.data()); } else { diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 19369d33..460e0e2e 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -175,8 +176,11 @@ ColumnLowCardinality::~ColumnLowCardinality() {} void ColumnLowCardinality::Reserve(size_t new_cap) { - dictionary_column_->Reserve(new_cap); - index_column_->Reserve(new_cap); + // Assumption is that dictionary must be smaller than index. + // NOTE(vnemkov): Formula below (`ceil(sqrt(x))`) is a gut-feeling-good-enough estimation, + // feel free to replace/adjust if you have better one suported by actual data. + dictionary_column_->Reserve(static_cast(ceil(sqrt(static_cast(new_cap))))); + index_column_->Reserve(new_cap + 2); // + 1 for null item (at pos 0), + 1 for default item (at pos 1) } void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { diff --git a/ut/block_ut.cpp b/ut/block_ut.cpp index 3828cb65..29c1b230 100644 --- a/ut/block_ut.cpp +++ b/ut/block_ut.cpp @@ -1,9 +1,16 @@ #include -#include "readonly_client_test.h" -#include "connection_failed_client_test.h" +#include +#include + +#include "clickhouse/columns/column.h" +#include "clickhouse/columns/lowcardinality.h" +#include "gtest/gtest-message.h" + +#include "ut/utils_comparison.h" #include "utils.h" #include +#include namespace { using namespace clickhouse; @@ -11,11 +18,20 @@ using namespace clickhouse; Block MakeBlock(std::vector> columns) { Block result; + const size_t number_of_rows = columns.size() ? columns[0].second->Size() : 0; + size_t i = 0; for (const auto & name_and_col : columns) { + EXPECT_EQ(number_of_rows, name_and_col.second->Size()) + << "Column #" << i << " " << name_and_col.first << " has incorrect number of rows"; + result.AppendColumn(name_and_col.first, name_and_col.second); + + ++i; } result.RefreshRowCount(); + EXPECT_EQ(number_of_rows, result.GetRowCount()); + return result; } @@ -83,3 +99,83 @@ TEST(BlockTest, Iterators) { ASSERT_NE(block.cbegin(), block.cend()); } +TEST(BlockTest, Clear) { + // Test that Block::Clear removes all rows from all of the columns, + // without changing column instances, types, names, etc. + + auto block = MakeBlock({ + {"foo", std::make_shared(std::vector{1, 2, 3, 4, 5})}, + {"bar", std::make_shared(std::vector{"1", "2", "3", "4", "5"})}, + }); + + std::vector> expected_columns_description; + for (const auto & c : block) { + expected_columns_description.emplace_back(c.Name(), c.Column().get()); + } + + block.Clear(); + + // Block must report empty after being cleared + EXPECT_EQ(0u, block.GetRowCount()); + + size_t i = 0; + for (const auto & c : block) { + const auto & [expected_name, expected_column] = expected_columns_description[i]; + SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\""); + + // MUST be same column object + EXPECT_EQ(expected_column, c.Column().get()); + + // MUST have same column name + EXPECT_EQ(expected_name, c.Name()); + + // column MUST be empty + EXPECT_EQ(0u, c.Column()->Size()); + + ++i; + } +} + +TEST(BlockTest, Reserve) { + // Test that Block::Reserve reserves space in all columns (uncheckable now), + // without changing column instances, names, and previously stored rows. + + auto block = MakeBlock({ + {"foo", std::make_shared(std::vector{1, 2, 3, 4, 5})}, + {"bar", std::make_shared(std::vector{"1", "2", "3", "4", "5"})}, + {"quix", std::make_shared>(std::vector{"1", "2", "3", "4", "5"})}, + }); + + const size_t initial_rows_count = block.GetRowCount(); + + std::vector> expected_columns_description; + for (const auto & c : block) { + expected_columns_description.emplace_back( + c.Name(), + c.Column().get(), // reference to the actual object + c.Column()->Slice(0, c.Column()->Size()) // reference to the values + ); + } + + block.Reserve(1000); // 1000 is arbitrary value + + // Block must same number of rows as before Reserve + EXPECT_EQ(initial_rows_count, block.GetRowCount()); + + size_t i = 0; + for (const auto & c : block) { + const auto & [expected_name, expected_column, expected_values] = expected_columns_description[i]; + SCOPED_TRACE(testing::Message("col #") << c.ColumnIndex() << " \"" << c.Name() << "\""); + + // MUST have same column name + EXPECT_EQ(expected_name, c.Name()); + + // MUST be same column object + EXPECT_EQ(expected_column, c.Column().get()); + + // column MUST have the same values + EXPECT_TRUE(CompareRecursive(*expected_values, *c.Column())); + + ++i; + } +} diff --git a/ut/utils.cpp b/ut/utils.cpp index bfa4872e..e2219dcd 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -19,9 +19,15 @@ #include // for ipv4-ipv6 platform-specific stuff #include +#include +#include #include #include +#include #include +#include "clickhouse/types/types.h" +#include "absl/numeric/int128.h" + namespace { @@ -42,7 +48,7 @@ struct DateTimeValue { }; std::ostream& operator<<(std::ostream & ostr, const DateTimeValue & time) { - const auto t = std::localtime(&time.value); + const auto t = std::gmtime(&time.value); char buffer[] = "2015-05-18 07:40:12\0\0"; std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", t); @@ -173,6 +179,7 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) @@ -332,6 +339,141 @@ std::ostream & operator<<(std::ostream & ostr, const Progress & progress) { << " written_bytes : " << progress.written_bytes; } +std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { + ostr << "ItemView {" << clickhouse::Type::TypeName(item_view.type) << " : "; + + switch (item_view.type) { + case Type::Void: + ostr << "--void--"; + break; + case Type::Int8: + ostr << static_cast(item_view.get()); + break; + case Type::Int16: + ostr << static_cast(item_view.get()); + break; + case Type::Int32: + ostr << static_cast(item_view.get()); + break; + case Type::Int64: + ostr << item_view.get(); + break; + case Type::UInt8: + ostr << static_cast(item_view.get()); + break; + case Type::UInt16: + ostr << static_cast(item_view.get()); + break; + case Type::UInt32: + ostr << static_cast(item_view.get()); + break; + case Type::UInt64: + ostr << item_view.get(); + break; + case Type::Float32: + ostr << static_cast(item_view.get()); + break; + case Type::Float64: + ostr << static_cast(item_view.get()); + break; + case Type::String: + case Type::FixedString: + ostr << "\"" << item_view.data << "\" (" << item_view.data.size() << " bytes)"; + break; + case Type::Date: + ostr << DateTimeValue(item_view.get() * 86400); + break; + case Type::Date32: + ostr << DateTimeValue(item_view.get()); + break; + case Type::DateTime: + ostr << DateTimeValue(item_view.get()); + break; + case Type::DateTime64: { + if (item_view.data.size() == sizeof(int32_t)) { + ostr << DateTimeValue(item_view.get()); + } + else if (item_view.data.size() == sizeof(int64_t)) { + ostr << DateTimeValue(item_view.get()); + } + else if (item_view.data.size() == sizeof(Int128)) { + ostr << DateTimeValue(item_view.get()); + } + else { + throw std::runtime_error("Invalid data size of ItemView of type DateTime64"); + } + break; + } + case Type::Enum8: + ostr << static_cast(item_view.get()); + break; + case Type::Enum16: + ostr << static_cast(item_view.get()); + break; + case Type::UUID: { + const auto & uuid_vals = reinterpret_cast(item_view.data.data()); + ostr << ToString(clickhouse::UUID{uuid_vals[0], uuid_vals[1]}); + break; + } + case Type::IPv4: { + in_addr addr; + addr.s_addr = ntohl(item_view.get()); + ostr << addr; + break; + } + case Type::IPv6: + ostr << *reinterpret_cast(item_view.AsBinaryData().data()); + break; + case Type::Int128: + ostr << item_view.get(); + break; + case Type::UInt128: + ostr << item_view.get(); + break; + case Type::Decimal: { + if (item_view.data.size() == sizeof(int32_t)) { + ostr << item_view.get(); + } + else if (item_view.data.size() == sizeof(int64_t)) { + ostr << item_view.get(); + } + else if (item_view.data.size() == sizeof(Int128)) { + ostr << item_view.get(); + } + else { + throw std::runtime_error("Invalid data size of ItemView of type Decimal"); + } + } + break; + case Type::Decimal32: + ostr << DateTimeValue(item_view.get()); + break; + case Type::Decimal64: + ostr << DateTimeValue(item_view.get()); + break; + case Type::Decimal128: + ostr << DateTimeValue(item_view.get()); + break; + // Unsupported types. i.e. there shouldn't be `ItemView`s of those types in practice. + // either because GetItem() is not implemented for corresponding column type + // OR this type code is never used, for `ItemView`s (but type code of wrapped column is). + case Type::LowCardinality: + case Type::Array: + case Type::Nullable: + case Type::Tuple: + case Type::Map: + case Type::Point: + case Type::Ring: + case Type::Polygon: + case Type::MultiPolygon: { + throw std::runtime_error("Invalid data size of ItemView of type " + std::string(Type::TypeName(item_view.type))); + } + }; + + return ostr << "}"; +} + + } uint64_t versionNumber(const ServerInfo & server_info) { diff --git a/ut/utils.h b/ut/utils.h index 99216743..d9eb5614 100644 --- a/ut/utils.h +++ b/ut/utils.h @@ -3,6 +3,7 @@ #include #include +#include "clickhouse/columns/itemview.h" #include "clickhouse/query.h" #include "utils_meta.h" #include "utils_comparison.h" @@ -144,6 +145,7 @@ std::ostream& operator<<(std::ostream & ostr, const Type & type); std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info); std::ostream & operator<<(std::ostream & ostr, const Profile & profile); std::ostream & operator<<(std::ostream & ostr, const Progress & progress); +std::ostream & operator<<(std::ostream& ostr, const ItemView& item_view); } std::ostream& operator<<(std::ostream & ostr, const PrettyPrintBlock & block); diff --git a/ut/utils_comparison.h b/ut/utils_comparison.h index e500e4f3..5c9d0b18 100644 --- a/ut/utils_comparison.h +++ b/ut/utils_comparison.h @@ -14,6 +14,8 @@ namespace clickhouse { class Block; class Column; + + std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view); } inline bool operator==(const in6_addr& left, const in6_addr& right) { @@ -104,6 +106,72 @@ struct ColumnAsContainerWrapper { return Iterator{nested_col, nested_col.Size()}; } }; + +// Helper to allow comparing values of two instances of clickhouse::Column, when concrete type is unknown. +// Comparison is done by comparing result of Column::GetItem(). +template <> +struct ColumnAsContainerWrapper { + const clickhouse::Column& nested_col; + + struct Iterator { + const clickhouse::Column& nested_col; + size_t i = 0; + + auto& operator++() { + ++i; + return *this; + } + + struct ItemWrapper { + const clickhouse::ItemView item_view; + + bool operator==(const ItemWrapper & other) const { + // type-erased comparison, byte-by-byte + return item_view.type == other.item_view.type + && item_view.data == other.item_view.data; + } + + template + bool operator==(const U & other) const { + return item_view.get() == other; + } + + template + bool operator!=(const U & other) const { + return !(*this == other); + } + + friend std::ostream& operator<<(std::ostream& ostr, const ItemWrapper& val) { + return ostr << val.item_view; + } + }; + + auto operator*() const { + return ItemWrapper{nested_col.GetItem(i)}; + } + + bool operator==(const Iterator & other) const { + return &other.nested_col == &this->nested_col && other.i == this->i; + } + + bool operator!=(const Iterator & other) const { + return !(other == *this); + } + }; + + size_t size() const { + return nested_col.Size(); + } + + auto begin() const { + return Iterator{nested_col, 0}; + } + + auto end() const { + return Iterator{nested_col, nested_col.Size()}; + } +}; + } template diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index bd015751..cb6cb55a 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -1,9 +1,19 @@ #include +#include "clickhouse/base/uuid.h" +#include "clickhouse/columns/date.h" +#include "clickhouse/columns/decimal.h" +#include "clickhouse/columns/enum.h" +#include "clickhouse/columns/ip4.h" +#include "clickhouse/columns/numeric.h" +#include "clickhouse/columns/string.h" +#include "clickhouse/columns/uuid.h" #include "ut/value_generators.h" #include "utils.h" +#include "absl/numeric/int128.h" #include #include +#include #include TEST(CompareRecursive, CompareValues) { @@ -120,3 +130,107 @@ TEST(Generators, MakeArrays) { auto arrays = MakeArrays(); ASSERT_LT(0u, arrays.size()); } + +// I.e. object ItemView can be serialized to string +std::string toString(const clickhouse::ItemView & iv) { + std::stringstream sstr; + sstr << iv; + + return sstr.str(); +} + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +#define EXPECTED_SERIALIZATION(expected, column_expression, value) \ +do {\ + auto column = column_expression; \ + column.Append((value)); \ + EXPECT_EQ("ItemView {" expected "}", toString(column.GetItem(0))) \ + << "Created from " << STRINGIFY((value)); \ +}\ +while (false) + +TEST(ItemView, OutputToOstream_VALID) { + // Testing output of `std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view)` + // result must match predefined value. + + using namespace clickhouse; + + // Positive cases: output should be generated + EXPECTED_SERIALIZATION("String : \"string\" (6 bytes)", ColumnString(), "string"); + EXPECTED_SERIALIZATION("FixedString : \"string\" (6 bytes)", ColumnFixedString(6), "string"); + + EXPECTED_SERIALIZATION("Int8 : -123", ColumnInt8(), -123); + EXPECTED_SERIALIZATION("Int16 : -1234", ColumnInt16(), -1234); + EXPECTED_SERIALIZATION("Int32 : -12345", ColumnInt32(), -12345); + EXPECTED_SERIALIZATION("Int64 : -123456", ColumnInt64(), -123456); + EXPECTED_SERIALIZATION("Int128 : -123456789", ColumnInt128(), absl::MakeInt128(-1, -123456789ll)); + + EXPECTED_SERIALIZATION("UInt8 : 123", ColumnUInt8(), 123); + EXPECTED_SERIALIZATION("UInt16 : 1234", ColumnUInt16(), 1234); + EXPECTED_SERIALIZATION("UInt32 : 12345", ColumnUInt32(), 12345); + EXPECTED_SERIALIZATION("UInt64 : 1234567", ColumnUInt64(), 1234567); + EXPECTED_SERIALIZATION("UInt128 : 123456789", ColumnUInt128(), absl::MakeUint128(0, 123456789ll)); + + EXPECTED_SERIALIZATION("Float32 : 1", ColumnFloat32(), 1); + EXPECTED_SERIALIZATION("Float64 : 4", ColumnFloat64(), 4); + + using EnumItem = Type::EnumItem; + + EXPECTED_SERIALIZATION("Enum8 : 123", ColumnEnum8(Type::CreateEnum8({EnumItem{"one", 123}})), 123); + EXPECTED_SERIALIZATION("Enum16 : 12345", ColumnEnum16(Type::CreateEnum16({EnumItem{"one", 12345}})), 12345); + + EXPECTED_SERIALIZATION("IPv4 : 127.0.0.1", ColumnIPv4(), "127.0.0.1"); + EXPECTED_SERIALIZATION("IPv6 : ::ffff:204.152.189.116", ColumnIPv6(), "::ffff:204.152.189.116"); + + EXPECTED_SERIALIZATION("UUID : bb6a8c69-9ab2-414c-8669-7b7fd27f0825", ColumnUUID(), UUID(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu)); + + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(6, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(6, 3), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(12, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(12, 6), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(18, 0), 1234567); + EXPECTED_SERIALIZATION("Decimal : 1234567", ColumnDecimal(18, 9), 1234567); + + EXPECTED_SERIALIZATION("Date : 1970-05-04 00:00:00", ColumnDate(), time_t(123) * 86400); + EXPECTED_SERIALIZATION("DateTime : 1970-01-15 06:56:07", ColumnDateTime(), 1234567); + // this is completely bogus, since precision is not taken into account + EXPECTED_SERIALIZATION("DateTime64 : 1970-01-15 06:56:07", ColumnDateTime64(3), 1234567); +#if defined(_unix_) + // These tests do not work on Windows, and since we test here auxiliary functionality + // (not used by clients, but only in tests), I assume it is safe to just ignore the failure. + EXPECTED_SERIALIZATION("DateTime64 : 1969-12-17 17:03:53", ColumnDateTime64(3), -1234567); + + { + auto column = ColumnDate32(); + column.AppendRaw(-123); + EXPECT_EQ("ItemView {Date32 : 1969-12-31 23:57:57}", toString(column.GetItem(0))); + } + // EXPECTED_SERIALIZATION("Date32 : 1969-08-31 00:00:00", ColumnDate32(), time_t(-123) * 86400); +#endif +} + +namespace { + +clickhouse::ItemView MakeEmptyItemView(clickhouse::Type::Code type_code) { + return clickhouse::ItemView(type_code, std::string_view()); +} + +} + +TEST(ItemView, OutputToOstream_negative) { + using namespace clickhouse; + + // Doesn't matter what content we point ItemView into, those types are not supported. + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::LowCardinality))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Array))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Nullable))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Tuple))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Map))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Point))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Ring))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::Polygon))); + EXPECT_ANY_THROW(toString(MakeEmptyItemView(Type::MultiPolygon))); + +}