diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index e26b478d87f92a..5d226f734a6e0d 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -965,7 +965,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr bool* has_null) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; - dst->resize(dst->size() + *n); + dst->insert_many_defaults(*n); return Status::OK(); } @@ -1025,7 +1025,7 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t vectorized::MutableColumnPtr& dst) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->resize(count); + dst->insert_many_defaults(count); return Status::OK(); } if (count == 0) { @@ -1259,7 +1259,7 @@ Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumn bool* has_null) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->resize(dst->size() + *n); + dst->insert_many_defaults(*n); return Status::OK(); } @@ -1317,7 +1317,7 @@ Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const siz vectorized::MutableColumnPtr& dst) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->resize(count); + dst->insert_many_defaults(count); return Status::OK(); } @@ -1519,7 +1519,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnP bool* has_null) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->resize(dst->size() + *n); + dst->insert_many_defaults(*n); return Status::OK(); } @@ -1573,7 +1573,7 @@ Status ArrayFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size vectorized::MutableColumnPtr& dst) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->resize(count); + dst->insert_many_defaults(count); return Status::OK(); } @@ -1736,7 +1736,7 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d bool* has_null) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->resize(dst->size() + *n); + dst->insert_many_defaults(*n); return Status::OK(); } @@ -1798,7 +1798,7 @@ Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t co vectorized::MutableColumnPtr& dst) { if (_reading_flag == ReadingFlag::SKIP_READING) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->resize(count); + dst->insert_many_defaults(count); return Status::OK(); } diff --git a/be/src/vec/columns/column.cpp b/be/src/vec/columns/column.cpp index de79eb0468c0ad..f6f595fc6cdc47 100644 --- a/be/src/vec/columns/column.cpp +++ b/be/src/vec/columns/column.cpp @@ -43,6 +43,67 @@ std::string IColumn::dump_structure() const { return res.str(); } +int IColumn::count_const_column() const { + int count = is_column_const(*this) ? 1 : 0; + ColumnCallback callback = [&](ColumnPtr& subcolumn) { + count += subcolumn->count_const_column(); + }; + // simply read using for_each_subcolumn without modification; const_cast can be used. + const_cast(this)->for_each_subcolumn(callback); + return count; +} + +bool IColumn::const_nested_check() const { + auto const_cnt = count_const_column(); + if (const_cnt == 0) { + return true; + } + // A const column is not allowed to be nested; it may only appear as the outermost (top-level) column. + return const_cnt == 1 && is_column_const(*this); +} + +bool IColumn::null_map_check() const { + auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) { + if (is_column_nullable(subcolumn)) { + const auto& nullable_col = assert_cast(subcolumn); + const auto& null_map = nullable_col.get_null_map_data(); + for (size_t i = 0; i < null_map.size(); ++i) { + if (null_map[i] != 0 && null_map[i] != 1) { + LOG_WARNING("null map check failed at index {} with value {}", i, null_map[i]) + .tag("column structure", subcolumn.dump_structure()); + return false; + } + } + } + return true; + }; + + bool is_valid = check_null_map_is_zero_or_one(*this); + ColumnCallback callback = [&](ColumnPtr& subcolumn) { + if (!subcolumn->null_map_check()) { + is_valid = false; + } + }; + // simply read using for_each_subcolumn without modification; const_cast can be used. + const_cast(this)->for_each_subcolumn(callback); + return is_valid; +} + +Status IColumn::column_self_check() const { +#ifndef NDEBUG + // check const nested + if (!const_nested_check()) { + return Status::InternalError("const nested check failed for column: {} , {}", get_name(), + dump_structure()); + } + // check null map + if (!null_map_check()) { + return Status::InternalError("null map check failed for column: {}", get_name()); + } +#endif + return Status::OK(); +} + void IColumn::insert_from(const IColumn& src, size_t n) { insert(src[n]); } diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index e414f71be56b6e..985fb19276e4b1 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -647,6 +647,17 @@ class IColumn : public COW { */ String dump_structure() const; + // count how many const column including self + int count_const_column() const; + + bool null_map_check() const; + + // const column nested check, eg. const(nullable(...)) is allowed + // const(array(const(...))) is not allowed + bool const_nested_check() const; + + Status column_self_check() const; + // only used in agg value replace for column which is not variable length, eg.BlockReader::_copy_value_data // usage: self_column.replace_column_data(other_column, other_column's row index, self_column's row index) virtual void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) = 0; diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index cfff732a141380..7bbebe8249568b 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -288,6 +288,7 @@ Status Block::check_type_and_column() const { const auto& type = elem.type; const auto& column = elem.column; + RETURN_IF_ERROR(column->column_self_check()); auto st = type->check_column(*column); if (!st.ok()) { return Status::InternalError( diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 6219130f45058a..213096cfb9d39b 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -244,6 +244,7 @@ Status VectorizedFnCall::_do_execute(VExprContext* context, const Block* block, num_columns_without_result, count)); result_column = temp_block.get_by_position(num_columns_without_result).column; DCHECK_EQ(result_column->size(), count); + RETURN_IF_ERROR(result_column->column_self_check()); return Status::OK(); } diff --git a/be/test/olap/rowset/segment_v2/column_reader_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_test.cpp index 04a8f8317a5d92..eaecb4eac4fc96 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_test.cpp @@ -291,7 +291,6 @@ TEST_F(ColumnReaderTest, MapReadByRowidsSkipReadingResizesDestination) { auto keys = vectorized::ColumnInt32::create(); auto values = vectorized::ColumnInt32::create(); auto offsets = vectorized::ColumnArray::ColumnOffsets::create(); - offsets->get_data().push_back(0); auto column_map = vectorized::ColumnMap::create(std::move(keys), std::move(values), std::move(offsets)); vectorized::MutableColumnPtr dst = std::move(column_map); diff --git a/be/test/vec/columns/column_self_check.cpp b/be/test/vec/columns/column_self_check.cpp new file mode 100644 index 00000000000000..13fcfcbf5f0907 --- /dev/null +++ b/be/test/vec/columns/column_self_check.cpp @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "runtime/primitive_type.h" +#include "testutil/column_helper.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_const.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +TEST(ColumnSelfCheckTest, const_check_test) { + { + ColumnPtr col = ColumnHelper::create_column({1, 2, 3}); + EXPECT_EQ(col->const_nested_check(), true); + } + + { + ColumnPtr col = ColumnHelper::create_column({1}); + ColumnPtr const_col = ColumnConst::create(col, 3); + EXPECT_EQ(const_col->const_nested_check(), true); + } + + { + ColumnPtr col = ColumnHelper::create_column({}); + + auto array_const_col = ColumnArray::create(col); + + array_const_col->data = ColumnConst::create(col, 3, true); + + auto const_array = ColumnConst::create(std::move(array_const_col), 2, true); + + std::cout << const_array->dump_structure() << std::endl; + + std::cout << const_array->count_const_column() << std::endl; + + EXPECT_EQ(const_array->const_nested_check(), false); + } + + { + ColumnPtr col = ColumnHelper::create_column({}); + + auto array_const_col = ColumnArray::create(col); + + array_const_col->data = ColumnConst::create(col, 3, true); + + auto const_array = ColumnConst::create(std::move(array_const_col), 2, true); + + Block block; + block.insert({std::move(const_array), + std::make_shared(std::make_shared()), + "array_col"}); + + EXPECT_FALSE(block.check_type_and_column()); + } +} + +TEST(ColumnSelfCheckTest, nullmap_check_test) { + { + auto col = ColumnHelper::create_column({1, 2, 3}); + EXPECT_EQ(col->null_map_check(), true); + } + + { + auto col = ColumnHelper::create_nullable_column({1, 2, 3}, {0, 0, 0}); + EXPECT_EQ(col->null_map_check(), true); + } + + { + auto col = ColumnHelper::create_nullable_column({1, 2, 3}, {0, 1, 0}); + EXPECT_EQ(col->null_map_check(), true); + } + + { + auto col = ColumnHelper::create_nullable_column({1, 2, 3}, {0, 2, 0}); + EXPECT_EQ(col->null_map_check(), false); + } + + { + auto col = ColumnHelper::create_nullable_column( + { + 1, + }, + {3}); + + auto col_const = ColumnConst::create(col, 2); + + EXPECT_EQ(col_const->null_map_check(), false); + } + + { + auto col = ColumnHelper::create_nullable_column( + { + 1, + }, + {3}); + + auto col_const = ColumnConst::create(col, 2); + + EXPECT_EQ(col_const->column_self_check(), false); + } +} + +} // namespace doris::vectorized \ No newline at end of file